#include <NTL/lip.h>
#include <NTL/ctools.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define MustAlloc(c, len) (!(c) || ((c)[-1] >> 1) < (len))
/* Fast test to determine if allocation is necessary */
static
void zhalt(char *c)
{
fprintf(stderr,"fatal error:\n %s\nexit...\n",c);
fflush(stderr);
abort();
}
#define NTL_GMP_MUL_CROSS (4)
#define NTL_GMP_SQR_CROSS (4)
#define NTL_GMP_DIV_CROSS (4)
#define NTL_GMP_REM_CROSS (4)
#define NTL_GMP_QREM_CROSS (4)
#define NTL_GMP_SQRT_CROSS (2)
#define NTL_GMP_GCD_CROSS (1)
#define NTL_GMP_XGCD_CROSS (1)
#define NTL_GMP_INVMOD_CROSS (1)
#ifdef NTL_GMP_HACK
/* using GMP */
#ifdef NTL_SINGLE_MUL
#error "sorry...not implemented"
#endif
#if (NTL_NBITS != NTL_NBITS_MAX)
#error "sorry...not implemented"
#endif
int _ntl_gmp_hack = 1;
#include <gmp.h>
static mp_limb_t *gspace_data = 0;
static long gspace_size = 0;
static void alloc_gspace(long n)
{
if (n <= gspace_size) return;
if (n <= gspace_size*1.1)
n = ((long) (gspace_size*1.1)) + 10;
if (gspace_data)
gspace_data =
(mp_limb_t *) NTL_REALLOC(gspace_data, n, sizeof(mp_limb_t), 0);
else
gspace_data = (mp_limb_t *) NTL_MALLOC(n, sizeof(mp_limb_t), 0);
if (!gspace_data) zhalt("alloc_gspace: out of memory");
gspace_size = n;
}
#define NTL_GSPACE(n) \
{ long _n = (n); if (_n > gspace_size) alloc_gspace(_n); }
static mpz_t gt_1, gt_2, gt_3, gt_4, gt_5;
static long gt_init = 0;
static
void do_gt_init()
{
gt_init = 1;
mpz_init(gt_1);
mpz_init(gt_2);
mpz_init(gt_3);
mpz_init(gt_4);
mpz_init(gt_5);
}
#define GT_INIT { if (!gt_init) do_gt_init(); }
#include "lip_gmp_aux_impl.h"
/* Check that we really have it...otherwise, some compilers
* only issue warnings for missing functions.
*/
#ifndef HAVE_LIP_GMP_AUX
#error "do not have lip_gmp_aux.h"
#endif
static
void lip_to_mpz(const long *x, mpz_t y)
{
long sx;
long neg;
long gsx;
if (!x || (x[0] == 1 && x[1] == 0)) {
mpz_set_ui(y, 0);
return;
}
sx = x[0];
if (sx < 0) {
neg = 1;
sx = -sx;
}
else
neg = 0;
gsx = L_TO_G(sx);
if (gsx > y->_mp_alloc) _mpz_realloc(y, gsx);
lip_to_gmp(x+1, y->_mp_d, sx);
while (y->_mp_d[gsx-1] == 0) gsx--;
if (neg) gsx = -gsx;
y->_mp_size = gsx;
}
static
void mpz_to_lip(long **x, mpz_t y)
{
long gsy;
long neg;
long sx;
long *xp;
gsy = y->_mp_size;
if (gsy == 0) {
_ntl_zzero(x);
return;
}
if (gsy < 0) {
neg = 1;
gsy = -gsy;
}
else
neg = 0;
sx = G_TO_L(gsy);
xp = *x;
if (MustAlloc(xp, sx)) {
_ntl_zsetlength(x, sx);
xp = *x;
}
gmp_to_lip1(xp+1, y->_mp_d, sx);
while (xp[sx] == 0) sx--;
if (neg) sx = -sx;
xp[0] = sx;
}
void _ntl_gmp_powermod(long **x, long *a, long *e, long *n)
{
long neg = 0;
GT_INIT
lip_to_mpz(a, gt_2);
lip_to_mpz(e, gt_3);
lip_to_mpz(n, gt_4);
if (mpz_sgn(gt_3) < 0) {
mpz_neg(gt_3, gt_3);
neg = 1;
}
mpz_powm(gt_1, gt_2, gt_3, gt_4);
/* This fixes a bug in GMP 3.0.1 */
if (mpz_cmp(gt_1, gt_4) >= 0)
mpz_sub(gt_1, gt_1, gt_4);
if (neg) {
if (!mpz_invert(gt_1, gt_1, gt_4))
zhalt("Modular inverse not defined");
}
mpz_to_lip(x, gt_1);
}
#else
/* not using GMP */
int _ntl_gmp_hack = 0;
#endif
#define DENORMALIZE NTL_FDOUBLE_PRECISION
#define MIN_SETL (4)
/* _ntl_zsetlength allocates a multiple of MIN_SETL digits */
#if (defined(NTL_SINGLE_MUL) && !defined(NTL_FAST_INT_MUL))
#define MulLo(rres,a,b) \
{ \
double _x = ((double) a) * ((double) b) + (DENORMALIZE); \
unsigned long _x_lo; \
NTL_FetchLo(_x_lo, _x); \
rres = _x_lo; \
}
#else
#define MulLo(rres,a,b) rres = (a)*(b)
#endif
/*
* definitions of zaddmulp, zxmulp, zaddmulpsq for the various
* long integer arithmentic implementation options.
*/
#if (defined(NTL_SINGLE_MUL))
#define zaddmulp(a,b,d,t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
unsigned long __lhi, __llo;\
double __lx = ((double) ((_a) + (_t))) + ((double) _b)*((double) _d); \
__lx += (DENORMALIZE); \
NTL_FetchHiLo(__lhi,__llo,__lx);\
__lhi = ((__lhi<<6)|(__llo>>26)) & 0x3FFFFFF; \
__llo &= 0x3FFFFFF; \
(a) = __llo;\
(t) = __lhi;\
}
#define zxmulp(a,b,d,t) \
{ \
long _b = (b), _d = (d), _t = (t); \
unsigned long __lhi, __llo;\
double __lx = ((double) ((_t))) + ((double) _b)*((double) _d); \
__lx += (DENORMALIZE); \
NTL_FetchHiLo(__lhi,__llo,__lx);\
__lhi = ((__lhi<<6)|(__llo>>26)) & 0x3FFFFFF; \
__llo &= 0x3FFFFFF; \
(a) = __llo;\
(t) = __lhi;\
}
#define zaddmulpsq(a,b,t) \
{ \
long _a = (a), _b = (b); \
unsigned long __lhi, __llo; \
double __lx = ((double) (_a)) + ((double) _b)*((double) _b); \
__lx += (DENORMALIZE); \
NTL_FetchHiLo(__lhi,__llo,__lx);\
__lhi = ((__lhi<<6)|(__llo>>26)) & 0x3FFFFFF; \
__llo &= 0x3FFFFFF; \
(a) = __llo;\
(t) = __lhi;\
}
#elif (defined(NTL_LONG_LONG))
#if (!defined(NTL_CLEAN_INT))
/*
* One might get slightly better code with this version.
*/
#define zaddmulp(a, b, d, t) { \
NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + ((t)+(a)); \
(a) = ((long)(_pp)) & NTL_RADIXM; \
(t) = (long) (_pp >> NTL_NBITS); \
}
#define zxmulp(a, b, d, t) { \
NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + (t); \
(a) = ((long)(_pp)) & NTL_RADIXM; \
(t) = (long) (_pp >> NTL_NBITS); \
}
#define zaddmulpsq(a,b,t) { \
NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (b)) + (a); \
(a) = ((long)(_pp)) & NTL_RADIXM; \
(t) = (long) (_pp >> NTL_NBITS); \
}
#else
/*
* This version conforms to the language standard when d is non-negative.
* Some compilers may emit sub-optimal code, though.
*/
#define zaddmulp(a, b, d, t) { \
NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + ((t)+(a)); \
(a) = (long) (_pp & NTL_RADIXM); \
(t) = (long) (_pp >> NTL_NBITS); \
}
#define zxmulp(a, b, d, t) { \
NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (d)) + (t); \
(a) = (long) (_pp & NTL_RADIXM); \
(t) = (long) (_pp >> NTL_NBITS); \
}
#define zaddmulpsq(a,b,t) { \
NTL_LL_TYPE _pp = ((NTL_LL_TYPE) (b)) * ((NTL_LL_TYPE) (b)) + (a); \
(a) = (long) (_pp & NTL_RADIXM); \
(t) = (long) (_pp >> NTL_NBITS); \
}
#endif
#elif (defined(NTL_AVOID_FLOAT))
#define zaddmulp( a, b, d, t) { \
unsigned long _b1 = b & NTL_RADIXROOTM; \
unsigned long _d1 = d & NTL_RADIXROOTM; \
unsigned long _bd,_b1d1,_m,_aa= (a) + (t); \
unsigned long _ld = (d>>NTL_NBITSH); \
unsigned long _lb = (b>>NTL_NBITSH); \
\
_bd=_lb*_ld; \
_b1d1=_b1*_d1; \
_m=(_lb+_b1)*(_ld+_d1) - _bd - _b1d1; \
_aa += ( _b1d1+ ((_m&NTL_RADIXROOTM)<<NTL_NBITSH)); \
(t) = (_aa >> NTL_NBITS) + _bd + (_m>>NTL_NBITSH); \
(a) = _aa & NTL_RADIXM; \
}
#define zxmulp( a, b, d, t) { \
unsigned long _b1 = b & NTL_RADIXROOTM; \
unsigned long _d1 = d & NTL_RADIXROOTM; \
unsigned long _bd,_b1d1,_m,_aa= (t); \
unsigned long _ld = (d>>NTL_NBITSH); \
unsigned long _lb = (b>>NTL_NBITSH); \
\
_bd=_lb*_ld; \
_b1d1=_b1*_d1; \
_m=(_lb+_b1)*(_ld+_d1) - _bd - _b1d1; \
_aa += ( _b1d1+ ((_m&NTL_RADIXROOTM)<<NTL_NBITSH)); \
(t) = (_aa >> NTL_NBITS) + _bd + (_m>>NTL_NBITSH); \
(a) = _aa & NTL_RADIXM; \
}
#define zaddmulpsq(_a, _b, _t) \
{ \
long _lb = (_b); \
long _b1 = (_b) & NTL_RADIXROOTM; \
long _aa = (_a) + _b1 * _b1; \
\
_b1 = (_b1 * (_lb >>= NTL_NBITSH) << 1) + (_aa >> NTL_NBITSH); \
_aa = (_aa & NTL_RADIXROOTM) + ((_b1 & NTL_RADIXROOTM) << NTL_NBITSH); \
(_t) = _lb * _lb + (_b1 >> NTL_NBITSH) + (_aa >> NTL_NBITS); \
(_a) = (_aa & NTL_RADIXM); \
}
#else
/* default long integer arithemtic */
/* various "software pipelining" routines are also defined */
/*
* The macros CARRY_TYPE and CARRY_CONV are only used in the submul
* logic.
*/
#if (defined(NTL_CLEAN_INT))
#define CARRY_TYPE unsigned long
#define CARRY_CONV(x) (-((long)(-x)))
#else
#define CARRY_TYPE long
#define CARRY_CONV(x) (x)
#endif
#if (NTL_BITS_PER_LONG <= NTL_NBITS + 2)
#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT))
/* value right-shifted is -1..1 */
#define zaddmulp(a, b, d, t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ); \
_t2 = _t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS); \
_t1 = (_t1 & NTL_RADIXM) + _a +_t; \
(t) = _t2 + (((unsigned long)_t1) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zxmulp(a, b, d, t) \
{ \
long _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d + _t; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = _t2 + (((unsigned long)(_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
/* value shifted is -1..1 */
#define zaddmulpsq(a, b, t) \
{ \
long _a = (a), _b = (b); \
long _t1 = _b*_b; \
long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ); \
_t2 = _t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS); \
_t1 = (_t1 & NTL_RADIXM) + _a; \
(t) = _t2 + (((unsigned long)_t1) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
/*
* In the following definition of zam_init, the value _ds is computed so that
* it is slightly bigger than s*NTL_RADIX_INV. This has the consequence that
* the value _hi is equal to floor(_b*_s/NTL_RADIX) or
* floor(_b*_s/NTL_RADIX) + 1, assuming only that (1) conversion of "small"
* integer to doubles is exact, (2) multiplication by powers of 2 is exact, and
* (3) multiplication of two general doubles yields a result with relative
* error 1/2^{NTL_DOUBLE_PRECISION-1}. These assumptions are very
* conservative, and in fact, the IEEE floating point standard would guarantee
* this result *without* making _ds slightly bigger.
*/
#define zam_decl double _ds; long _hi, _lo, _s;
#define zam_init(b,s) \
{ \
long _b = (b); \
_s = (s); \
_ds = ((_s << 1)+1)*(NTL_FRADIX_INV/2.0); \
_lo = _b*_s; \
_hi = (long) (((double) _b)*_ds); \
}
/* value shifted is 0..3 */
#define zam_loop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _lo + _a + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is -1..+1 */
#define zsx_loop(a,t,nb) \
{ \
long _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _lo + _t; \
(t) = _hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is -2..+1 */
#define zam_subloop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _a + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..3 */
#define zam_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + _a + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* value shifted is -1..+1 */
#define zsx_finish(a,t) \
{ \
long _t = (t); \
_lo = _lo + _t; \
(t) = _hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* value shifted is -2..+1 */
#define zam_subfinish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _a + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
}
#elif (!defined(NTL_CLEAN_INT))
/* right shift is not arithmetic */
/* value right-shifted is 0..2 */
#define zaddmulp(a, b, d, t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
_t2 = _t2 + ( ((unsigned long) (_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS ); \
_t1 = (_t1 & NTL_RADIXM) + _a +_t; \
(t) = _t2 + (((unsigned long)_t1) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zxmulp(a, b, d, t) \
{ \
long _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d + _t; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = _t2 + (((unsigned long)(_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
/* value shifted is 0..2 */
#define zaddmulpsq(a, b, t) \
{ \
long _a = (a), _b = (b); \
long _t1 = _b*_b; \
long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \
_t2 = _t2 + ( ((unsigned long) (_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS ); \
_t1 = (_t1 & NTL_RADIXM) + _a; \
(t) = _t2 + (((unsigned long)_t1) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zam_decl double _ds; long _hi, _lo, _s;
#define zam_init(b,s) \
{ \
long _b = (b); \
_s = (s); \
_ds = ((_s << 1)+1)*(NTL_FRADIX_INV/2.0); \
_lo = _b*_s; \
_hi = (long) (((double) _b)*_ds); \
}
/* value shifted is 0..3 */
#define zam_loop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _lo + _a + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..2 */
#define zsx_loop(a,t,nb) \
{ \
long _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _lo + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..3 */
#define zam_subloop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_hi += 2; \
_lo = _a + _t - _lo; \
(t) = (((unsigned long)(_lo + (_hi<<NTL_NBITS))) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..3 */
#define zam_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + _a + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* value shifted is 0..2 */
#define zsx_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* value shifted is 0..3 */
#define zam_subfinish(a,t) \
{ \
long _a = (a), _t = (t); \
_hi += 2; \
_lo = _a + _t - _lo; \
(t) = (((unsigned long)(_lo + (_hi<<NTL_NBITS))) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
}
#else
/* clean int version */
/* value right-shifted is 0..2 */
#define zaddmulp(a, b, d, t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d); \
unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
_t2 = _t2 + ( (_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS ); \
_t1 = (_t1 & NTL_RADIXM) + ((unsigned long) _a) + ((unsigned long) _t); \
(t) = (long) (_t2 + (_t1 >> NTL_NBITS)); \
(a) = (long) (_t1 & NTL_RADIXM); \
}
#define zxmulp(a, b, d, t) \
{ \
long _b = (b), _d = (d), _t = (t); \
unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d) + ((unsigned long) _t); \
unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_t1 & NTL_RADIXM); \
}
/* value shifted is 0..2 */
#define zaddmulpsq(a, b, t) \
{ \
long _a = (a), _b = (b); \
unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _b); \
unsigned long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \
_t2 = _t2 + ( (_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS ); \
_t1 = (_t1 & NTL_RADIXM) + ((unsigned long) _a); \
(t) = (long) (_t2 + (_t1 >> NTL_NBITS)); \
(a) = (long) (_t1 & NTL_RADIXM); \
}
#define zam_decl double _ds; long _s; unsigned long _hi, _lo;
#define zam_init(b,s) \
{ \
long _b = (b); \
_s = (s); \
_ds = ((_s << 1)+1)*(NTL_FRADIX_INV/2.0); \
_lo = ((unsigned long) _b)*((unsigned long) _s); \
_hi = (long) (((double) _b)*_ds); \
}
/* value shifted is 0..3 */
#define zam_loop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
unsigned long _vv; \
double _yy; \
_vv = ((unsigned long) _nb)*((unsigned long)_s); \
_yy = ((double) _nb)*_ds; \
_lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \
_hi--; \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..2 */
#define zsx_loop(a,t,nb) \
{ \
long _t = (t), _nb = (nb); \
unsigned long _vv; \
double _yy; \
_vv = ((unsigned long) _nb)*((unsigned long) _s); \
_yy = ((double) _nb)*_ds; \
_lo = _lo + ((unsigned long) _t); \
_hi--; \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..3 */
#define zam_subloop(a,t,nb) \
{ \
long _a = (a); unsigned long _t = (t); long _nb = (nb); \
unsigned long _vv; \
double _yy; \
_vv = ((unsigned long) _nb)*((unsigned long) _s); \
_yy = ((double) _nb)*_ds; \
_hi += 2; \
_lo = ((unsigned long) _a) + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = (long) (_lo & NTL_RADIXM); \
_lo = _vv; \
_hi = (long) _yy; \
}
/* value shifted is 0..3 */
#define zam_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \
_hi--; \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
}
/* value shifted is 0..2 */
#define zsx_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + ((unsigned long) _t); \
_hi--; \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
}
/* value shifted is 0..3 */
#define zam_subfinish(a,t) \
{ \
long _a = (a); unsigned long _t = (t); \
_hi += 2; \
_lo = ((unsigned long) _a) + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = (long) (_lo & NTL_RADIXM); \
}
#endif
/* end of arithmemtic-right-shift if-then else */
#else
/* NTL_BITS_PER_LONG > NTL_NBITS + 2, and certain optimizations can be
made. Useful on 64-bit machines. */
#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT))
/* shift is -1..+3 */
#define zaddmulp(a, b, d, t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d + _a + _t; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ); \
(t) = _t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zxmulp(a, b, d, t) \
{ \
long _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d + _t; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ); \
(t) = _t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
/* shift is -1..+2 */
#define zaddmulpsq(a, b, t) \
{ \
long _a = (a), _b = (b), _t = (t); \
long _t1 = _b*_b + _a; \
long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ); \
(t) = _t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zam_decl double _ds; long _hi, _lo, _s;
#define zam_init(b,s) \
{ \
long _b = (b); \
_s = (s); \
_ds = _s*NTL_FRADIX_INV; \
_lo = _b*_s; \
_hi = (long) (((double) _b)*_ds); \
}
/* shift is -1..+3 */
#define zam_loop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _lo + _a + _t; \
(t) = _hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is -1..+2 */
#define zsx_loop(a,t,nb) \
{ \
long _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _lo + _t; \
(t) = _hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is -3..+1 */
#define zam_subloop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_lo = _a + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is -1..+3 */
#define zam_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + _a + _t; \
(t) = _hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* shift is -1..+2 */
#define zsx_finish(a,t) \
{ \
long _t = (t); \
_lo = _lo + _t; \
(t) = _hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* shift is -3..+1 */
#define zam_subfinish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _a + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
}
#elif (!defined(NTL_CLEAN_INT))
/* right shift is not arithmetic */
/* shift is 0..4 */
#define zaddmulp(a, b, d, t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d + _a + _t; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = _t2 + (((unsigned long)(_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zxmulp(a, b, d, t) \
{ \
long _b = (b), _d = (d), _t = (t); \
long _t1 = _b*_d + _t; \
long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = _t2 + (((unsigned long)(_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
/* shift is 0..3 */
#define zaddmulpsq(a, b, t) \
{ \
long _a = (a), _b = (b), _t = (t); \
long _t1 = _b*_b + _a; \
long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \
(t) = _t2 + (((unsigned long)(_t1 - (_t2 << NTL_NBITS))) >> NTL_NBITS); \
(a) = _t1 & NTL_RADIXM; \
}
#define zam_decl double _ds; long _hi, _lo, _s;
#define zam_init(b,s) \
{ \
long _b = (b); \
_s = (s); \
_ds = _s*NTL_FRADIX_INV; \
_lo = _b*_s; \
_hi = (long) (((double) _b)*_ds); \
}
/* shift is 0..4 */
#define zam_loop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_hi--; \
_lo = _lo + _a + _t; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is 0..3 */
#define zsx_loop(a,t,nb) \
{ \
long _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_hi--; \
_lo = _lo + _t; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is 0..4 */
#define zam_subloop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
long _vv; \
double _yy; \
_vv = _nb*_s; \
_yy = ((double) _nb)*_ds; \
_hi += 3; \
_lo = _a + _t - _lo; \
(t) = (((unsigned long)(_lo + (_hi<<NTL_NBITS))) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is 0..4 */
#define zam_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + _a + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* shift is 0..3 */
#define zsx_finish(a,t) \
{ \
long _t = (t); \
_lo = _lo + _t; \
_hi--; \
(t) = _hi + (((unsigned long)(_lo - (_hi<<NTL_NBITS))) >> NTL_NBITS); \
(a) = _lo & NTL_RADIXM; \
}
/* shift is 0..4 */
#define zam_subfinish(a,t) \
{ \
long _a = (a), _t = (t); \
_hi += 3; \
_lo = _a + _t - _lo; \
(t) = (((unsigned long)(_lo + (_hi<<NTL_NBITS))) >> NTL_NBITS) - _hi; \
(a) = _lo & NTL_RADIXM; \
}
#else
/* clean int version */
/* shift is 0..4 */
#define zaddmulp(a, b, d, t) \
{ \
long _a = (a), _b = (b), _d = (d), _t = (t); \
unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d) + ((unsigned long) _a) + ((unsigned long) _t); \
unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_t1 & NTL_RADIXM); \
}
#define zxmulp(a, b, d, t) \
{ \
long _b = (b), _d = (d), _t = (t); \
unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _d) + ((unsigned long) _t); \
unsigned long _t2 = (long) ( ((double) _b)*(((double) _d)*NTL_FRADIX_INV) ) - 1; \
(t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_t1 & NTL_RADIXM); \
}
/* shift is 0..3 */
#define zaddmulpsq(a, b, t) \
{ \
long _a = (a), _b = (b), _t = (t); \
unsigned long _t1 = ((unsigned long) _b)*((unsigned long) _b) + ((unsigned long) _a); \
unsigned long _t2 = (long) ( ((double) _b)*(((double) _b)*NTL_FRADIX_INV) ) - 1; \
(t) = (long) (_t2 + ((_t1 - (_t2 << NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_t1 & NTL_RADIXM); \
}
#define zam_decl double _ds; long _s; unsigned long _hi, _lo;
#define zam_init(b,s) \
{ \
long _b = (b); \
_s = (s); \
_ds = _s*NTL_FRADIX_INV; \
_lo = ((unsigned long) _b)*((unsigned long) _s); \
_hi = (long) (((double) _b)*_ds); \
}
/* shift is 0..4 */
#define zam_loop(a,t,nb) \
{ \
long _a = (a), _t = (t), _nb = (nb); \
unsigned long _vv; \
double _yy; \
_vv = ((unsigned long) _nb)*((unsigned long) _s); \
_yy = ((double) _nb)*_ds; \
_hi--; \
_lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is 0..3 */
#define zsx_loop(a,t,nb) \
{ \
long _t = (t), _nb = (nb); \
unsigned long _vv; \
double _yy; \
_vv = ((unsigned long) _nb)*((unsigned long) _s); \
_yy = ((double) _nb)*_ds; \
_hi--; \
_lo = _lo + ((unsigned long) _t); \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is 0..4 */
#define zam_subloop(a,t,nb) \
{ \
long _a = (a); unsigned long _t = (t); long _nb = (nb); \
unsigned long _vv; \
double _yy; \
_vv = ((unsigned long) _nb)*((unsigned long) _s); \
_yy = ((double) _nb)*_ds; \
_hi += 3; \
_lo = ((unsigned long) _a) + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = (long) (_lo & NTL_RADIXM); \
_lo = _vv; \
_hi = (long) _yy; \
}
/* shift is 0..4 */
#define zam_finish(a,t) \
{ \
long _a = (a), _t = (t); \
_lo = _lo + ((unsigned long) _a) + ((unsigned long) _t); \
_hi--; \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = _lo & NTL_RADIXM; \
}
/* shift is 0..3 */
#define zsx_finish(a,t) \
{ \
long _t = (t); \
_lo = _lo + ((unsigned long) _t); \
_hi--; \
(t) = (long) (_hi + ((_lo - (_hi<<NTL_NBITS)) >> NTL_NBITS)); \
(a) = (long) (_lo & NTL_RADIXM); \
}
/* shift is 0..4 */
#define zam_subfinish(a,t) \
{ \
long _a = (a); unsigned long _t = (t); \
_hi += 3; \
_lo = ((unsigned long) _a) + _t - _lo; \
(t) = ((_lo + (_hi<<NTL_NBITS)) >> NTL_NBITS) - _hi; \
(a) = (long) (_lo & NTL_RADIXM); \
}
#endif
/* end of arithmetic-right-shift if-then-else */
#endif
/* end of "NTL_BITS_PER_LONG <= NTL_NBITS + 2" if-then-else */
#endif
/* end of long-integer-implementation if-then-else */
static
void zaddmulone(long *lama, long *lamb)
{
long lami;
long lams = 0;
lams = 0;
for (lami = (*lamb++); lami > 0; lami--) {
lams += (*lama + *lamb++);
*lama++ = lams & NTL_RADIXM;
lams >>= NTL_NBITS;
}
*lama += lams;
}
#if (NTL_ARITH_RIGHT_SHIFT && !defined(NTL_CLEAN_INT))
static
void zsubmulone(long *lama, long *lamb)
{
long lami;
long lams = 0;
lams = 0;
for (lami = (*lamb++); lami > 0; lami--) {
lams += (*lama - *lamb++);
*lama++ = lams & NTL_RADIXM;
lams >>= NTL_NBITS;
}
*lama += lams;
}
#else
static
void zsubmulone(long *lama, long *lamb)
{
long lami;
long lams = 0;
lams = 0;
for (lami = (*lamb++); lami > 0; lami--) {
lams = *lama - *lamb++ - lams;
*lama++ = lams & NTL_RADIXM;
lams = (lams < 0);
}
*lama -= lams;
}
#endif
/*
* definitions of zaddmul, zsxmul, zaddmulsq for the various
* long integer implementation options.
*/
#if (defined(NTL_SINGLE_MUL))
static
void zaddmul(long ams, long *ama, long *amb)
{
long carry = 0;
long i = (*amb++);
double dams = (double) ams;
double xx;
double yy;
unsigned long lo_wd, lo;
unsigned long hi_wd, hi;
xx = ((double) (*amb++))*dams + DENORMALIZE;
for (; i > 1; i--) {
yy = ((double) (*amb++))*dams +DENORMALIZE;
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = lo + (*ama) + carry;
*ama = lo & 0x3FFFFFF;
carry = hi + (lo >> 26);
ama++;
xx = yy;
}
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = lo + (*ama) + carry;
*ama = lo & 0x3FFFFFF;
carry = hi + (lo >> 26);
ama++;
*ama += carry;
}
static
void zsxmul(long ams, long *ama, long *amb)
{
long carry = 0;
long i = (*amb++);
double dams = (double) ams;
double xx;
double yy;
unsigned long lo_wd, lo;
unsigned long hi_wd, hi;
xx = ((double) (*amb++))*dams + DENORMALIZE;
for (; i > 1; i--) {
yy = ((double) (*amb++))*dams +DENORMALIZE;
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = lo + carry;
*ama = lo & 0x3FFFFFF;
carry = hi + (lo >> 26);
ama++;
xx = yy;
}
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = lo + carry;
*ama = lo & 0x3FFFFFF;
carry = hi + (lo >> 26);
ama++;
*ama = carry;
}
static
void zaddmulsq(long ams, long *ama, long *amb)
{
long carry = 0;
long i = ams;
double dams = (double) (*amb++);
double xx;
double yy;
unsigned long lo, lo_wd;
unsigned long hi, hi_wd;
xx = ((double) (*amb++))*dams + DENORMALIZE;
for (; i > 1; i--) {
yy = ((double) (*amb++))*dams + DENORMALIZE;
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = lo + (*ama) + carry;
*ama = lo & 0x3FFFFFF;
carry = hi + (lo >> 26);
ama++;
xx = yy;
}
if (i==1) {
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = lo + (*ama) + carry;
*ama = lo & 0x3FFFFFF;
carry = hi + (lo >> 26);
ama++;
}
*ama += carry;
}
#elif (defined(NTL_AVOID_FLOAT) || defined(NTL_LONG_LONG))
static
void zaddmul(long lams, long *lama, long *lamb)
{
long lami;
long lamcarry = 0;
for (lami = (*lamb++); lami > 0; lami--)
{
zaddmulp(*lama, *lamb, lams, lamcarry);
lama++;
lamb++;
}
*lama += lamcarry;
}
static
void zsxmul(long lams, long *lama, long *lamb)
{
long lami;
long lamcarry = 0;
for (lami = (*lamb++); lami > 0; lami--)
{
zxmulp(*lama, *lamb, lams, lamcarry);
lama++;
lamb++;
}
*lama = lamcarry;
}
static
void zaddmulsq(long lsqi, long *lsqa, long *lsqb)
{
long lsqs = *(lsqb);
long lsqcarry = 0;
lsqb++;
for (; lsqi > 0; lsqi--)
{
zaddmulp(*lsqa, *lsqb, lsqs, lsqcarry);
lsqa++;
lsqb++;
}
*lsqa += lsqcarry;
}
#else
/* default long integer arithmetic */
static
void zaddmul(long lams, long *lama, long *lamb)
{
long lami = (*lamb++)-1;
long lamcarry = 0;
zam_decl;
zam_init(*lamb, lams);
lamb++;
for (; lami > 0; lami--) {
zam_loop(*lama, lamcarry, *lamb);
lama++;
lamb++;
}
zam_finish(*lama, lamcarry);
lama++;
*lama += lamcarry;
}
static
void zsxmul(long lams, long *lama, long *lamb)
{
long lami = (*lamb++)-1;
long lamcarry = 0;
zam_decl;
zam_init(*lamb, lams);
lamb++;
for (; lami > 0; lami--) {
zsx_loop(*lama, lamcarry, *lamb);
lama++;
lamb++;
}
zsx_finish(*lama, lamcarry);
lama++;
*lama = lamcarry;
}
static
void zaddmulsq(long lsqi, long *lsqa, long *lsqb)
{
long lsqs;
long lsqcarry;
zam_decl
if (lsqi <= 0) return;
lsqs = *lsqb;
lsqcarry = 0;
lsqb++;
zam_init(*lsqb, lsqs);
lsqb++;
lsqi--;
for (; lsqi > 0; lsqi--) {
zam_loop(*lsqa, lsqcarry, *lsqb);
lsqa++;
lsqb++;
}
zam_finish(*lsqa, lsqcarry);
lsqa++;
*lsqa += lsqcarry;
}
#endif
/*
* definition of zsubmul for the various long integer implementation options.
* Note that zsubmul is only called with a positive first argument.
*/
#if (defined(NTL_SINGLE_MUL))
#if (NTL_ARITH_RIGHT_SHIFT)
static
void zsubmul(long ams, long *ama, long *amb)
{
long carry = 0;
long i = (*amb++);
double dams = (double) ams;
double xx;
double yy;
unsigned long lo_wd, lo;
unsigned long hi_wd, hi;
xx = ((double) (*amb++))*dams + DENORMALIZE;
for (; i > 1; i--) {
yy = ((double) (*amb++))*dams +DENORMALIZE;
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = (*ama) + carry - lo;
*ama = lo & 0x3FFFFFF;
carry = (((long)lo) >> 26) - hi;
ama++;
xx = yy;
}
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = (*ama) + carry - lo;
*ama = lo & 0x3FFFFFF;
carry = (((long)lo) >> 26) - hi;
ama++;
*ama += carry;
}
#else
static
void zsubmul(long ams, long *ama, long *amb)
{
long carry = 0;
long i = (*amb++);
double dams = (double) ams;
double xx;
double yy;
unsigned long lo_wd, lo;
unsigned long hi_wd, hi;
xx = ((double) (*amb++))*dams + DENORMALIZE;
for (; i > 1; i--) {
yy = ((double) (*amb++))*dams +DENORMALIZE;
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = (*ama) + carry - lo;
*ama = lo & 0x3FFFFFF;
carry = ((lo + (1L << 27)) >> 26) - hi - 2;
ama++;
xx = yy;
}
NTL_FetchHiLo(hi_wd, lo_wd, xx);
lo = lo_wd & 0x3FFFFFF;
hi = ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
lo = (*ama) + carry - lo;
*ama = lo & 0x3FFFFFF;
carry = ((lo + (1L << 27)) >> 26) - hi - 2;
ama++;
*ama += carry;
}
#endif
#elif (defined(NTL_AVOID_FLOAT) || (defined(NTL_LONG_LONG) && defined(NTL_CLEAN_INT)))
static void
zsubmul(
long r,
_ntl_verylong a,
_ntl_verylong b
)
{
long rd = NTL_RADIX - r;
long i;
long carry = NTL_RADIX;
for (i = (*b++); i > 0; i--)
{
zaddmulp(*a, *b, rd, carry);
a++;
carry += NTL_RADIXM - (*b++);
}
*a += carry - NTL_RADIX; /* unnormalized */
}
#elif (defined(NTL_LONG_LONG))
/*
* NOTE: the implementation of zaddmulp for the NTL_LONG_LONG option
* will work on most machines even when the single-precision
* multiplicand is negative; however, the C language standard does
* not guarantee correct behaviour in this case, which is why the above
* implementation is used when NTL_CLEAN_INT is set.
*/
static
void zsubmul(long lams, long *lama, long *lamb)
{
long lami;
long lamcarry = 0;
lams = -lams;
for (lami = (*lamb++); lami > 0; lami--)
{
zaddmulp(*lama, *lamb, lams, lamcarry);
lama++;
lamb++;
}
*lama += lamcarry;
}
#else
/* default long integer arithmetic */
static
void zsubmul(long lams, long *lama, long *lamb)
{
long lami = (*lamb++)-1;
CARRY_TYPE lamcarry = 0;
zam_decl;
zam_init(*lamb, lams);
lamb++;
for (; lami > 0; lami--) {
zam_subloop(*lama, lamcarry, *lamb);
lama++;
lamb++;
}
zam_subfinish(*lama, lamcarry);
lama++;
*lama += CARRY_CONV(lamcarry);
}
#endif
/*
*
* zdiv21 returns quot, numhigh so
*
* quot = (numhigh*NTL_RADIX + numlow)/denom;
* numhigh = (numhigh*NTL_RADIX + numlow)%denom;
* Assumes 0 <= numhigh < denom < NTL_RADIX and 0 <= numlow < NTL_RADIX.
*/
#if (defined(NTL_CLEAN_INT))
/*
* This "clean" version relies on the guaranteed semantics of
* unsigned integer arithmetic.
*/
#define zdiv21(numhigh, numlow, denom, deninv, quot) \
{ \
unsigned long udenom = denom; \
unsigned long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) + \
(double) (numlow)) * (deninv)); \
unsigned long lr21 = (((unsigned long) numhigh) << NTL_NBITS) + \
((unsigned long) numlow) - udenom*lq21 ; \
\
if (lr21 >> (NTL_BITS_PER_LONG-1)) { \
lq21--; \
lr21 += udenom; \
} \
else if (lr21 >= udenom) { \
lr21 -= udenom; \
lq21++; \
} \
quot = (long) lq21; \
numhigh = (long) lr21; \
}
#else
/*
* This "less clean" version relies on wrap-around semantics for
* signed integer arithmetic.
*/
#define zdiv21(numhigh, numlow, denom, deninv, quot) \
{ \
long lr21; \
long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) \
+ (double) (numlow)) * (deninv)); \
long lp21; \
MulLo(lp21, lq21, denom); \
lr21 = (numhigh << NTL_NBITS) + numlow - lp21; \
if (lr21 < 0) { \
lq21--; \
lr21 += denom; \
} \
else if (lr21 >= denom) { \
lr21 -= denom; \
lq21++; \
} \
quot = lq21; \
numhigh = lr21; \
}
#endif
/*
* zrem21 behaves just like zdiv21, except the only the remainder is computed.
*/
#if (defined(NTL_CLEAN_INT) || (defined(NTL_AVOID_BRANCHING) && !NTL_ARITH_RIGHT_SHIFT))
#define NTL_CLEAN_SPMM
#endif
#if (defined(NTL_CLEAN_SPMM) && !defined(NTL_AVOID_BRANCHING))
#define zrem21(numhigh, numlow, denom, deninv) \
{ \
unsigned long udenom = denom; \
unsigned long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) + \
(double) (numlow)) * (deninv)); \
unsigned long lr21 = (((unsigned long) numhigh) << NTL_NBITS) + \
((unsigned long) numlow) - udenom*lq21 ; \
\
if (lr21 >> (NTL_BITS_PER_LONG-1)) { \
lr21 += udenom; \
} \
else if (lr21 >= udenom) { \
lr21 -= udenom; \
} \
numhigh = (long) lr21; \
}
#elif (defined(NTL_CLEAN_SPMM) && defined(NTL_AVOID_BRANCHING))
#define zrem21(numhigh, numlow, denom, deninv) \
{ \
unsigned long udenom = denom; \
unsigned long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) + \
(double) (numlow)) * (deninv)); \
unsigned long lr21 = (((unsigned long) numhigh) << NTL_NBITS) + \
((unsigned long) numlow) - udenom*lq21 ; \
lr21 += (-(lr21 >> (NTL_BITS_PER_LONG-1))) & udenom; \
lr21 -= udenom; \
lr21 += (-(lr21 >> (NTL_BITS_PER_LONG-1))) & udenom; \
numhigh = (long) lr21; \
}
#elif (NTL_ARITH_RIGHT_SHIFT && defined(NTL_AVOID_BRANCHING))
#define zrem21(numhigh, numlow, denom, deninv) \
{ \
long lr21; \
long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) \
+ (double) (numlow)) * (deninv)); \
long lp21; \
MulLo(lp21, lq21, denom); \
lr21 = (numhigh << NTL_NBITS) + numlow - lp21; \
lr21 += (lr21 >> (NTL_BITS_PER_LONG-1)) & denom; \
lr21 -= denom; \
lr21 += (lr21 >> (NTL_BITS_PER_LONG-1)) & denom; \
numhigh = lr21; \
}
#else
#define zrem21(numhigh, numlow, denom, deninv) \
{ \
long lr21; \
long lq21 = (long) (((NTL_FRADIX * (double) (numhigh)) \
+ (double) (numlow)) * (deninv)); \
long lp21; \
MulLo(lp21, lq21, denom); \
lr21 = (numhigh << NTL_NBITS) + numlow - lp21; \
if (lr21 < 0) lr21 += denom; \
else if (lr21 >= denom) lr21 -= denom; \
numhigh = lr21; \
}
#endif
void _ntl_zsetlength(_ntl_verylong *v, long len)
{
_ntl_verylong x = *v;
if (len < 0)
zhalt("negative size allocation in _ntl_zsetlength");
if (NTL_OVERFLOW(len, NTL_NBITS, 0))
zhalt("size too big in _ntl_zsetlength");
#ifdef L_TO_G_CHECK_LEN
/* this makes sure that numbers don't get too big for GMP */
if (len >= (1L << (NTL_BITS_PER_INT-4)))
zhalt("size too big for GMP");
#endif
if (x) {
long oldlen = x[-1];
long fixed = oldlen & 1;
oldlen = oldlen >> 1;
if (fixed) {
if (len > oldlen)
zhalt("internal error: can't grow this _ntl_verylong");
else
return;
}
if (len <= oldlen) return;
len++; /* always allocate at least one more than requested */
oldlen = (long) (oldlen * 1.2); /* always increase by at least 20% */
if (len < oldlen)
len = oldlen;
/* round up to multiple of MIN_SETL */
len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL;
/* test len again */
if (NTL_OVERFLOW(len, NTL_NBITS, 0))
zhalt("size too big in _ntl_zsetlength");
x[-1] = len << 1;
if (!(x = (_ntl_verylong)NTL_REALLOC(&(x[-1]),
len, sizeof(long), 2*sizeof(long)))) {
zhalt("reallocation failed in _ntl_zsetlength");
}
}
else {
len++;
len = ((len+(MIN_SETL-1))/MIN_SETL)*MIN_SETL;
/* test len again */
if (NTL_OVERFLOW(len, NTL_NBITS, 0))
zhalt("size too big in _ntl_zsetlength");
if (!(x = (_ntl_verylong)NTL_MALLOC(len,
sizeof(long), 2*sizeof(long)))) {
zhalt("allocation failed in _ntl_zsetlength");
}
x[0] = len << 1;
x[1] = 1;
x[2] = 0;
}
*v = x+1;
}
void _ntl_zfree(_ntl_verylong *x)
{
_ntl_verylong y;
if (!(*x))
return;
if ((*x)[-1] & 1)
zhalt("Internal error: can't free this _ntl_verylong");
y = (*x - 1);
free((void*)y);
*x = 0;
}
long _ntl_zround_correction(_ntl_verylong a, long k, long residual)
{
long direction;
long p;
long sgn;
long bl;
long wh;
long i;
if (a[0] > 0)
sgn = 1;
else
sgn = -1;
p = k - 1;
bl = (p/NTL_NBITS);
wh = 1L << (p - NTL_NBITS*bl);
bl++;
if (a[bl] & wh) {
/* bit is 1...we have to see if lower bits are all 0
in order to implement "round to even" */
if (a[bl] & (wh - 1))
direction = 1;
else {
i = bl - 1;
while (i > 0 && a[i] == 0) i--;
if (i > 0)
direction = 1;
else
direction = 0;
}
/* use residual to break ties */
if (direction == 0 && residual != 0) {
if (residual == sgn)
direction = 1;
else
direction = -1;
}
if (direction == 0) {
/* round to even */
wh = wh << 1;
if (wh == NTL_RADIX) {
wh = 1;
bl++;
}
if (a[bl] & wh)
direction = 1;
else
direction = -1;
}
}
else
direction = -1;
if (direction == 1)
return sgn;
return 0;
}
double _ntl_zdoub_aux(_ntl_verylong n)
{
double res;
long i;
if (!n)
return ((double) 0);
if ((i = n[0]) < 0)
i = -i;
res = (double) (n[i--]);
for (; i; i--)
res = res * NTL_FRADIX + (double) (n[i]);
if (n[0] > 0)
return (res);
return (-res);
}
double _ntl_zdoub(_ntl_verylong n)
{
static _ntl_verylong tmp = 0;
long s;
long shamt;
long correction;
double x;
s = _ntl_z2log(n);
shamt = s - NTL_DOUBLE_PRECISION;
if (shamt <= 0)
return _ntl_zdoub_aux(n);
_ntl_zrshift(n, shamt, &tmp);
correction = _ntl_zround_correction(n, shamt, 0);
if (correction) _ntl_zsadd(tmp, correction, &tmp);
x = _ntl_zdoub_aux(tmp);
x = _ntl_ldexp(x, shamt);
return x;
}
double _ntl_zlog(_ntl_verylong n)
{
static _ntl_verylong tmp = 0;
static double log_2;
static long init = 0;
long s;
long shamt;
long correction;
double x;
if (!init) {
log_2 = log(2.0);
init = 1;
}
if (_ntl_zsign(n) <= 0)
zhalt("log argument <= 0");
s = _ntl_z2log(n);
shamt = s - NTL_DOUBLE_PRECISION;
if (shamt <= 0)
return log(_ntl_zdoub_aux(n));
_ntl_zrshift(n, shamt, &tmp);
correction = _ntl_zround_correction(n, shamt, 0);
if (correction) _ntl_zsadd(tmp, correction, &tmp);
x = _ntl_zdoub_aux(tmp);
return log(x) + shamt*log_2;
}
void _ntl_zdoubtoz(double a, _ntl_verylong *xx)
{
_ntl_verylong x;
long neg, i, t, sz;
a = floor(a);
if (!_ntl_IsFinite(&a))
zhalt("_ntl_zdoubtoz: attempt to convert non-finite value");
if (a < 0) {
a = -a;
neg = 1;
}
else
neg = 0;
if (a == 0) {
_ntl_zzero(xx);
return;
}
sz = 1;
a = a*NTL_FRADIX_INV;
while (a >= 1) {
a = a*NTL_FRADIX_INV;
sz++;
}
x = *xx;
if (MustAlloc(x, sz)) {
_ntl_zsetlength(&x, sz);
*xx = x;
}
for (i = sz; i > 0; i--) {
a = a*NTL_FRADIX;
t = (long) a;
x[i] = t;
a = a - t;
}
x[0] = (neg ? -sz : sz);
}
void _ntl_zzero(_ntl_verylong *aa)
{
if (!(*aa)) _ntl_zsetlength(aa, 1);
(*aa)[0] = 1;
(*aa)[1] = 0;
}
/* same as _ntl_zzero, except does not unnecessarily allocate space */
void _ntl_zzero1(_ntl_verylong *aa)
{
if (!(*aa)) return;
(*aa)[0] = 1;
(*aa)[1] = 0;
}
void _ntl_zone(_ntl_verylong *aa)
{
if (!(*aa)) _ntl_zsetlength(aa, 1);
(*aa)[0] = 1;
(*aa)[1] = 1;
}
void _ntl_zcopy(_ntl_verylong a, _ntl_verylong *bb)
{
long i;
_ntl_verylong b = *bb;
if (!a) {
_ntl_zzero(bb);
return;
}
if (a != b) {
if ((i = *a) < 0)
i = (-i);
if (MustAlloc(b, i)) {
_ntl_zsetlength(&b, i);
*bb = b;
}
for (; i >= 0; i--)
*b++ = *a++;
}
}
/* same as _ntl_zcopy, but does not unnecessarily allocate space */
void _ntl_zcopy1(_ntl_verylong a, _ntl_verylong *bb)
{
long i;
_ntl_verylong b = *bb;
if (!a) {
_ntl_zzero1(bb);
return;
}
if (a != b) {
if ((i = *a) < 0)
i = (-i);
if (MustAlloc(b, i)) {
_ntl_zsetlength(&b, i);
*bb = b;
}
for (; i >= 0; i--)
*b++ = *a++;
}
}
void _ntl_zintoz(long d, _ntl_verylong *aa)
{
long i;
long anegative;
unsigned long d1, d2;
_ntl_verylong a = *aa;
anegative = 0;
if (d < 0) {
anegative = 1;
d1 = - ((unsigned long) d); /* careful: avoid overflow */
}
else
d1 = d;
i = 0;
d2 = d1;
do {
d2 >>= NTL_NBITS;
i++;
}
while (d2 > 0);
if (MustAlloc(a, i)) {
_ntl_zsetlength(&a, i);
*aa = a;
}
i = 0;
a[1] = 0;
while (d1 > 0) {
a[++i] = d1 & NTL_RADIXM;
d1 >>= NTL_NBITS;
}
if (i > 0)
a[0] = i;
else
a[0] = 1;
if (anegative)
a[0] = (-a[0]);
}
/* same as _ntl_zintoz, but does not unnecessarily allocate space */
void _ntl_zintoz1(long d, _ntl_verylong *aa)
{
long i;
long anegative;
unsigned long d1, d2;
_ntl_verylong a = *aa;
if (!d && !a) return;
anegative = 0;
if (d < 0) {
anegative = 1;
d1 = - ((unsigned long) d); /* careful: avoid overlow */
}
else
d1 = d;
i = 0;
d2 = d1;
do {
d2 >>= NTL_NBITS;
i++;
}
while (d2 > 0);
if (MustAlloc(a, i)) {
_ntl_zsetlength(&a, i);
*aa = a;
}
i = 0;
a[1] = 0;
while (d1 > 0) {
a[++i] = d1 & NTL_RADIXM;
d1 >>= NTL_NBITS;
}
if (i > 0)
a[0] = i;
else
a[0] = 1;
if (anegative)
a[0] = (-a[0]);
}
void _ntl_zuintoz(unsigned long d, _ntl_verylong *aa)
{
long i;
unsigned long d1, d2;
_ntl_verylong a = *aa;
d1 = d;
i = 0;
d2 = d1;
do {
d2 >>= NTL_NBITS;
i++;
}
while (d2 > 0);
if (MustAlloc(a, i)) {
_ntl_zsetlength(&a, i);
*aa = a;
}
i = 0;
a[1] = 0;
while (d1 > 0) {
a[++i] = d1 & NTL_RADIXM;
d1 >>= NTL_NBITS;
}
if (i > 0)
a[0] = i;
else
a[0] = 1;
}
unsigned long _ntl_ztouint(_ntl_verylong a)
{
unsigned long d;
long sa;
if (!a)
return (0);
if ((sa = *a) < 0)
sa = -sa;
d = (unsigned long) (*(a += sa));
while (--sa) {
d <<= NTL_NBITS;
d += (unsigned long) (*(--a));
}
if ((*(--a)) < 0)
return (-d);
return (d);
}
long _ntl_ztoint(_ntl_verylong a)
{
unsigned long res = _ntl_ztouint(a);
return NTL_ULONG_TO_LONG(res);
}
long _ntl_zcompare(_ntl_verylong a, _ntl_verylong b)
{
long sa;
long sb;
if (!a) {
if (!b)
return (0);
if (b[0] < 0)
return (1);
if (b[0] > 1)
return (-1);
if (b[1])
return (-1);
return (0);
}
if (!b) {
if (a[0] < 0)
return (-1);
if (a[0] > 1)
return (1);
if (a[1])
return (1);
return (0);
}
if ((sa = *a) > (sb = *b))
return (1);
if (sa < sb)
return (-1);
if (sa < 0)
sa = (-sa);
a += sa;
b += sa;
for (; sa; sa--) {
long diff = *a - *b;
if (diff > 0) {
if (sb < 0)
return (-1);
return (1);
}
if (diff < 0) {
if (sb < 0)
return (1);
return (-1);
}
a--;
b--;
}
return (0);
}
void _ntl_znegate(_ntl_verylong *aa)
{
_ntl_verylong a = *aa;
if (!a)
return;
if (a[1] || a[0] != 1)
a[0] = (-a[0]);
}
void _ntl_zsadd(_ntl_verylong a, long d, _ntl_verylong *b)
{
static _ntl_verylong x = 0;
_ntl_zintoz(d, &x);
_ntl_zadd(a, x, b);
}
void
_ntl_zadd(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc)
{
long sa;
long sb;
long anegative;
_ntl_verylong c;
long a_alias, b_alias;
if (!a) {
if (b)
_ntl_zcopy(b, cc);
else
_ntl_zzero(cc);
return;
}
if (!b) {
_ntl_zcopy(a, cc);
return;
}
c = *cc;
a_alias = (a == c);
b_alias = (b == c);
if ((anegative = ((sa = a[0]) < 0)) == ((sb = b[0]) < 0)) {
/* signs a and b are the same */
_ntl_verylong pc;
long carry;
long i;
long maxab;
if (anegative) {
sa = -sa;
sb = -sb;
}
if (sa < sb) {
i = sa;
maxab = sb;
}
else {
i = sb;
maxab = sa;
}
if (MustAlloc(c, maxab+1)) {
_ntl_zsetlength(&c, maxab + 1);
if (a_alias) a = c;
if (b_alias) b = c;
*cc = c;
}
pc = c;
carry = 0;
do {
long t = (*(++a)) + (*(++b)) + carry;
carry = t >> NTL_NBITS;
*(++pc) = t & NTL_RADIXM;
i--;
} while (i);
i = sa-sb;
if (!i) goto i_exit;
if (i < 0) {
i = -i;
a = b;
}
if (!carry) goto carry_exit;
for (;;) {
long t = (*(++a)) + 1;
carry = t >> NTL_NBITS;
*(++pc) = t & NTL_RADIXM;
i--;
if (!i) goto i_exit;
if (!carry) goto carry_exit;
}
i_exit:
if (carry) {
*(++pc) = 1;
maxab++;
}
*c = anegative ? -maxab : maxab;
return;
carry_exit:
if (pc != a) {
do {
*(++pc) = *(++a);
i--;
} while (i);
}
*c = anegative ? -maxab : maxab;
}
else {
/* signs a and b are different...use _ntl_zsub */
if (anegative) {
a[0] = -sa;
_ntl_zsub(b, a, cc);
if (!a_alias) a[0] = sa;
}
else {
b[0] = -sb;
_ntl_zsub(a, b, cc);
if (!b_alias) b[0] = sb;
}
}
}
void
_ntl_zsub(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc)
{
long sa;
long sb;
long anegative;
long a_alias, b_alias;
_ntl_verylong c;
if (!b) {
if (a)
_ntl_zcopy(a, cc);
else
_ntl_zzero(cc);
return;
}
if (!a) {
_ntl_zcopy(b, cc);
_ntl_znegate(cc);
return;
}
c = *cc;
a_alias = (a == c);
b_alias = (b == c);
if ((anegative = ((sa = a[0]) < 0)) == ((sb = b[0]) < 0)) {
/* signs agree */
long i, carry, *pc;
if (anegative) {
sa = -sa;
sb = -sb;
}
carry = sa - sb;
if (!carry) {
long *aa = a + sa;
long *bb = b + sa;
i = sa;
while (i && !(carry = (*aa - *bb))) {
aa--; bb--; i--;
}
}
if (!carry) {
_ntl_zzero(cc);
return;
}
if (carry < 0) {
{ long t = sa; sa = sb; sb = t; }
{ long t = a_alias; a_alias = b_alias; b_alias = t; }
{ long *t = a; a = b; b = t; }
anegative = !anegative;
}
if (MustAlloc(c, sa)) {
_ntl_zsetlength(&c, sa);
/* must have !a_alias */
if (b_alias) b = c;
*cc = c;
}
i = sb;
carry = 0;
pc = c;
do {
#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT))
long t = (*(++a)) - (*(++b)) - carry;
carry = (t < 0);
#else
long t = (*(++a)) - (*(++b)) + carry;
carry = t >> NTL_NBITS;
#endif
*(++pc) = t & NTL_RADIXM;
i--;
} while (i);
i = sa-sb;
while (carry) {
long t = (*(++a)) - 1;
#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT))
carry = (t < 0);
#else
carry = t >> NTL_NBITS;
#endif
*(++pc) = t & NTL_RADIXM;
i--;
}
if (i) {
if (pc != a) {
do {
*(++pc) = *(++a);
i--;
} while (i);
}
}
else {
while (sa > 1 && *pc == 0) { sa--; pc--; }
}
if (anegative) sa = -sa;
*c = sa;
}
else {
/* signs of a and b are different...use _ntl_zadd */
if (anegative) {
a[0] = -sa;
_ntl_zadd(a, b, cc);
if (!a_alias) a[0] = sa;
c = *cc;
c[0] = -c[0];
}
else {
b[0] = -sb;
_ntl_zadd(a, b, cc);
if (!b_alias) b[0] = sb;
}
}
}
void
_ntl_zsmul(_ntl_verylong a, long d, _ntl_verylong *bb)
{
long sa;
long anegative, bnegative;
_ntl_verylong b;
long a_alias;
if (d == 2) {
_ntl_z2mul(a, bb);
return;
}
if ((d >= NTL_RADIX) || (d <= -NTL_RADIX)) {
static _ntl_verylong x = 0;
_ntl_zintoz(d,&x);
_ntl_zmul(a, x, bb);
return;
}
if (!a || (a[0] == 1 && a[1] == 0)) {
_ntl_zzero(bb);
return;
}
if (!d) {
_ntl_zzero(bb);
return;
}
/* both inputs non-zero */
anegative = 0;
bnegative = 0;
if ((sa = a[0]) < 0) {
anegative = 1;
a[0] = sa = (-sa);
if (d < 0)
d = (-d);
else
bnegative = 1;
}
else if (bnegative = (d < 0))
d = (-d);
b = *bb;
a_alias = (a == b);
if (MustAlloc(b, sa + 1)) {
_ntl_zsetlength(&b, sa + 1);
if (a_alias) a = b;
*bb = b;
}
zsxmul(d, b+1, a);
sa++;
while ((sa > 1) && (!(b[sa])))
sa--;
b[0] = sa;
if (bnegative)
b[0] = (-b[0]);
if (anegative && !a_alias)
a[0] = -a[0];
}
void _ntl_zsubpos(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc)
{
long sa;
long sb;
long *c, *pc;
long i, carry;
long b_alias;
if (!b) {
if (a)
_ntl_zcopy(a, cc);
else
_ntl_zzero(cc);
return;
}
if (!a) {
_ntl_zzero(cc);
return;
}
sa = a[0];
sb = b[0];
c = *cc;
b_alias = (b == c);
if (MustAlloc(c, sa)) {
_ntl_zsetlength(&c, sa);
if (b_alias) b = c;
*cc = c;
}
i = sb;
carry = 0;
pc = c;
while (i) {
#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT))
long t = (*(++a)) - (*(++b)) - carry;
carry = (t < 0);
#else
long t = (*(++a)) - (*(++b)) + carry;
carry = t >> NTL_NBITS;
#endif
*(++pc) = t & NTL_RADIXM;
i--;
}
i = sa-sb;
while (carry) {
long t = (*(++a)) - 1;
#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT))
carry = (t < 0);
#else
carry = t >> NTL_NBITS;
#endif
*(++pc) = t & NTL_RADIXM;
i--;
}
if (i) {
if (pc != a) {
do {
*(++pc) = *(++a);
i--;
} while (i);
}
}
else {
while (sa > 1 && *pc == 0) { sa--; pc--; }
}
*c = sa;
}
static long *kmem = 0; /* globals for Karatsuba */
static long max_kmem = 0;
/* These cross-over points were estimated using
a Sparc-10, a Sparc-20, and a Pentium-90. */
#if (defined(NTL_SINGLE_MUL))
#define KARX (18)
#else
#define KARX (16)
#endif
/* Auxilliary routines for Karatsuba */
static
void kar_fold(long *T, long *b, long hsa)
{
long sb, *p2, *p3, i, carry;
sb = *b;
p2 = b + hsa;
p3 = T;
carry = 0;
for (i = sb-hsa; i>0; i--) {
long t = (*(++b)) + (*(++p2)) + carry;
carry = t >> NTL_NBITS;
*(++p3) = t & NTL_RADIXM;
}
for (i = (hsa << 1) - sb; i>0; i--) {
long t = (*(++b)) + carry;
carry = t >> NTL_NBITS;
*(++p3) = t & NTL_RADIXM;
}
if (carry) {
*(++p3) = carry;
*T = hsa + 1;
}
else
*T = hsa;
}
static
void kar_sub(long *T, long *c)
{
long i, carry;
i = *c;
carry = 0;
while (i>0) {
#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT))
long t = (*(++T)) - (*(++c)) - carry;
carry = (t < 0);
#else
long t = (*(++T)) - (*(++c)) + carry;
carry = t >> NTL_NBITS;
#endif
*T = t & NTL_RADIXM;
i--;
}
while (carry) {
long t = (*(++T)) - 1;
#if (!NTL_ARITH_RIGHT_SHIFT || defined(NTL_CLEAN_INT))
carry = (t < 0);
#else
carry = t >> NTL_NBITS;
#endif
*T = t & NTL_RADIXM;
}
}
static
void kar_add(long *c, long *T, long hsa)
{
long i, carry;
c += hsa;
i = *T;
while (T[i] == 0 && i > 0) i--;
carry = 0;
while (i>0) {
long t = (*(++c)) + (*(++T)) + carry;
carry = t >> NTL_NBITS;
*c = t & NTL_RADIXM;
i--;
}
while (carry) {
long t = (*(++c)) + 1;
carry = t >> NTL_NBITS;
*c = t & NTL_RADIXM;
}
}
static
void kar_fix(long *c, long *T, long hsa)
{
long i, carry, s;
s = *T;
i = hsa;
while (i>0) {
*(++c) = *(++T);
i--;
}
i = s - hsa;
carry = 0;
while (i > 0) {
long t = (*(++c)) + (*(++T)) + carry;
carry = t >> NTL_NBITS;
*c = t & NTL_RADIXM;
i--;
}
while (carry) {
long t = (*(++c)) + 1;
carry = t >> NTL_NBITS;
*c = t & NTL_RADIXM;
}
}
static
void kar_mul(long *c, long *a, long *b, long *stk)
{
long sa, sb, sc;
if (*a < *b) { long *t = a; a = b; b = t; }
sa = *a;
sb = *b;
sc = sa + sb;
if (sb < KARX) {
/* classic algorithm */
long *pc, i, *pb;
pc = c;
for (i = sc; i; i--) {
pc++;
*pc = 0;
}
pc = c;
pb = b;
for (i = sb; i; i--) {
pb++;
pc++;
zaddmul(*pb, pc, a);
}
}
else {
long hsa = (sa + 1) >> 1;
if (hsa < sb) {
/* normal case */
long *T1, *T2, *T3;
/* allocate space */
T1 = stk; stk += hsa + 2;
T2 = stk; stk += hsa + 2;
T3 = stk; stk += (hsa << 1) + 3;
if (stk-kmem > max_kmem) zhalt("internal error: kmem overflow");
/* compute T1 = a_lo + a_hi */
kar_fold(T1, a, hsa);
/* compute T2 = b_lo + b_hi */
kar_fold(T2, b, hsa);
/* recursively compute T3 = T1 * T2 */
kar_mul(T3, T1, T2, stk);
/* recursively compute a_hi * b_hi into high part of c */
/* and subtract from T3 */
{
long olda, oldb;
olda = a[hsa]; a[hsa] = sa-hsa;
oldb = b[hsa]; b[hsa] = sb-hsa;
kar_mul(c + (hsa << 1), a + hsa, b + hsa, stk);
kar_sub(T3, c + (hsa << 1));
a[hsa] = olda;
b[hsa] = oldb;
}
/* recursively compute a_lo*b_lo into low part of c */
/* and subtract from T3 */
*a = hsa;
*b = hsa;
kar_mul(c, a, b, stk);
kar_sub(T3, c);
*a = sa;
*b = sb;
/* finally, add T3 * NTL_RADIX^{hsa} to c */
kar_add(c, T3, hsa);
}
else {
/* degenerate case */
long *T;
T = stk; stk += sb + hsa + 1;
if (stk-kmem > max_kmem) zhalt("internal error: kmem overflow");
/* recursively compute b*a_hi into high part of c */
{
long olda;
olda = a[hsa]; a[hsa] = sa-hsa;
kar_mul(c + hsa, a + hsa, b, stk);
a[hsa] = olda;
}
/* recursively compute b*a_lo into T */
*a = hsa;
kar_mul(T, a, b, stk);
*a = sa;
/* fix-up result */
kar_fix(c, T, hsa);
}
}
/* normalize result */
while (c[sc] == 0 && sc > 1) sc--;
*c = sc;
}
#if (defined(NTL_SINGLE_MUL))
#define KARSX (36)
#else
#define KARSX (32)
#endif
static
void kar_sq(long *c, long *a, long *stk)
{
long sa, sc;
sa = *a;
sc = sa << 1;
if (sa < KARSX) {
/* classic algorithm */
long carry, i, j, *pc;
pc = c;
for (i = sc; i; i--) {
pc++;
*pc = 0;
}
carry = 0;
i = 0;
for (j = 1; j <= sa; j++) {
unsigned long uncar;
long t;
i += 2;
uncar = ((unsigned long) carry) + (((unsigned long) c[i-1]) << 1);
t = uncar & NTL_RADIXM;
zaddmulpsq(t, a[j], carry);
c[i-1] = t;
zaddmulsq(sa-j, c+i, a+j);
uncar = (uncar >> NTL_NBITS) + (((unsigned long) c[i]) << 1);
uncar += ((unsigned long) carry);
carry = uncar >> NTL_NBITS;
c[i] = uncar & NTL_RADIXM;
}
}
else {
long hsa = (sa + 1) >> 1;
long *T1, *T2, olda;
T1 = stk; stk += hsa + 2;
T2 = stk; stk += (hsa << 1) + 3;
if (stk-kmem > max_kmem) zhalt("internal error: kmem overflow");
kar_fold(T1, a, hsa);
kar_sq(T2, T1, stk);
olda = a[hsa]; a[hsa] = sa - hsa;
kar_sq(c + (hsa << 1), a + hsa, stk);
kar_sub(T2, c + (hsa << 1));
a[hsa] = olda;
*a = hsa;
kar_sq(c, a, stk);
kar_sub(T2, c);
*a = sa;
kar_add(c, T2, hsa);
}
while (c[sc] == 0 && sc > 1) sc--;
*c = sc;
}
void _ntl_zmul(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *cc)
{
static _ntl_verylong mem = 0;
_ntl_verylong c = *cc;
if (!a || (a[0] == 1 && a[1] == 0) || !b || (b[0] == 1 && b[1] == 0)) {
_ntl_zzero(cc);
return;
}
if (a == b) {
if (a == c) {
_ntl_zcopy(a, &mem);
a = mem;
}
_ntl_zsq(a, cc);
}
else {
long aneg, bneg, sa, sb, sc;
if (a == c) {
_ntl_zcopy(a, &mem);
a = mem;
}
else if (b == c) {
_ntl_zcopy(b, &mem);
b = mem;
}
sa = *a;
if (sa < 0) {
*a = sa = -sa;
aneg = 1;
}
else
aneg = 0;
sb = *b;
if (*b < 0) {
*b = sb = -sb;
bneg = 1;
}
else
bneg = 0;
sc = sa + sb;
if (MustAlloc(c, sc)) {
_ntl_zsetlength(&c, sc);
*cc = c;
}
/* we optimize for *very* small numbers,
* avoiding all function calls and loops */
if (sa <= 3 && sb <= 3) {
long carry, d;
switch (sa) {
case 1:
switch (sb) {
case 1:
carry = 0;
zxmulp(c[1], a[1], b[1], carry);
c[2] = carry;
break;
case 2:
carry = 0;
d = a[1];
zxmulp(c[1], b[1], d, carry);
zxmulp(c[2], b[2], d, carry);
c[3] = carry;
break;
case 3:
carry = 0;
d = a[1];
zxmulp(c[1], b[1], d, carry);
zxmulp(c[2], b[2], d, carry);
zxmulp(c[3], b[3], d, carry);
c[4] = carry;
break;
}
break;
case 2:
switch (sb) {
case 1:
carry = 0;
d = b[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
c[3] = carry;
break;
case 2:
carry = 0;
d = b[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
c[3] = carry;
carry = 0;
d = b[2];
zaddmulp(c[2], a[1], d, carry);
zaddmulp(c[3], a[2], d, carry);
c[4] = carry;
break;
case 3:
carry = 0;
d = a[1];
zxmulp(c[1], b[1], d, carry);
zxmulp(c[2], b[2], d, carry);
zxmulp(c[3], b[3], d, carry);
c[4] = carry;
carry = 0;
d = a[2];
zaddmulp(c[2], b[1], d, carry);
zaddmulp(c[3], b[2], d, carry);
zaddmulp(c[4], b[3], d, carry);
c[5] = carry;
break;
}
break;
case 3:
switch (sb) {
case 1:
carry = 0;
d = b[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
zxmulp(c[3], a[3], d, carry);
c[4] = carry;
break;
case 2:
carry = 0;
d = b[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
zxmulp(c[3], a[3], d, carry);
c[4] = carry;
carry = 0;
d = b[2];
zaddmulp(c[2], a[1], d, carry);
zaddmulp(c[3], a[2], d, carry);
zaddmulp(c[4], a[3], d, carry);
c[5] = carry;
break;
case 3:
carry = 0;
d = b[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
zxmulp(c[3], a[3], d, carry);
c[4] = carry;
carry = 0;
d = b[2];
zaddmulp(c[2], a[1], d, carry);
zaddmulp(c[3], a[2], d, carry);
zaddmulp(c[4], a[3], d, carry);
c[5] = carry;
carry = 0;
d = b[3];
zaddmulp(c[3], a[1], d, carry);
zaddmulp(c[4], a[2], d, carry);
zaddmulp(c[5], a[3], d, carry);
c[6] = carry;
break;
}
}
if (c[sc] == 0) sc--;
if (aneg != bneg) sc = -sc;
*c = sc;
if (aneg) *a = -sa;
if (bneg) *b = -sb;
return;
}
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && sa >= NTL_GMP_MUL_CROSS && sb >= NTL_GMP_MUL_CROSS) {
long gsa = L_TO_G(sa);
long gsb = L_TO_G(sb);
mp_limb_t *a_p, *b_p, *c_p, *end_p;
NTL_GSPACE((gsa + gsb) << 1);
a_p = gspace_data;
b_p = a_p + gsa;
c_p = b_p + gsb;
end_p = c_p + (gsa + gsb);
lip_to_gmp(a+1, a_p, sa);
lip_to_gmp(b+1, b_p, sb);
if (!a_p[gsa-1]) gsa--;
if (!b_p[gsb-1]) gsb--;
end_p[-1] = 0;
end_p[-2] = 0;
if (gsa >= gsb)
mpn_mul(c_p, a_p, gsa, b_p, gsb);
else
mpn_mul(c_p, b_p, gsb, a_p, gsa);
gmp_to_lip(c+1, c_p, sc);
if (!c[sc]) sc--;
if (aneg != bneg) sc = -sc;
c[0] = sc;
if (aneg) *a = - *a;
if (bneg) *b = - *b;
return;
}
#endif
if (*a < KARX || *b < KARX) {
/* classic algorithm */
long i, *pc;
pc = c;
for (i = sc; i; i--) {
pc++;
*pc = 0;
}
pc = c;
if (*a >= *b) {
long *pb = b;
for (i = *pb; i; i--) {
pb++;
pc++;
zaddmul(*pb, pc, a);
}
}
else {
long *pa = a;
for (i = *pa; i; i--) {
pa++;
pc++;
zaddmul(*pa, pc, b);
}
}
while (c[sc] == 0 && sc > 1) sc--;
if (aneg != bneg) sc = -sc;
c[0] = sc;
if (aneg) *a = - *a;
if (bneg) *b = - *b;
}
else {
/* karatsuba */
long n, hn, sp;
if (*a < *b)
n = *b;
else
n = *a;
sp = 0;
do {
hn = (n + 1) >> 1;
sp += (hn << 2) + 7;
n = hn+1;
} while (n >= KARX);
if (sp > max_kmem) {
if (max_kmem == 0)
kmem = (long *) NTL_MALLOC(sp, sizeof(long), 0);
else
kmem = (long *) NTL_REALLOC(kmem, sp, sizeof(long), 0);
max_kmem = sp;
if (!kmem) zhalt("out of memory in karatsuba");
}
kar_mul(c, a, b, kmem);
if (aneg != bneg) *c = - *c;
if (aneg) *a = - *a;
if (bneg) *b = - *b;
}
}
}
void _ntl_zsq(_ntl_verylong a, _ntl_verylong *cc)
{
static _ntl_verylong mem = 0;
_ntl_verylong c = *cc;
long sa, aneg, sc;
if (!a || (a[0] == 1 && a[1] == 0)) {
_ntl_zzero(cc);
return;
}
if (a == c) {
_ntl_zcopy(a, &mem);
a = mem;
}
sa = *a;
if (*a < 0) {
*a = sa = -sa;
aneg = 1;
}
else
aneg = 0;
sc = (sa) << 1;
if (MustAlloc(c, sc)) {
_ntl_zsetlength(&c, sc);
*cc = c;
}
if (sa <= 3) {
long carry, d;
switch (sa) {
case 1:
carry = 0;
zxmulp(c[1], a[1], a[1], carry);
c[2] = carry;
break;
case 2:
carry = 0;
d = a[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
c[3] = carry;
carry = 0;
d = a[2];
zaddmulp(c[2], a[1], d, carry);
zaddmulp(c[3], a[2], d, carry);
c[4] = carry;
break;
case 3:
carry = 0;
d = a[1];
zxmulp(c[1], a[1], d, carry);
zxmulp(c[2], a[2], d, carry);
zxmulp(c[3], a[3], d, carry);
c[4] = carry;
carry = 0;
d = a[2];
zaddmulp(c[2], a[1], d, carry);
zaddmulp(c[3], a[2], d, carry);
zaddmulp(c[4], a[3], d, carry);
c[5] = carry;
carry = 0;
d = a[3];
zaddmulp(c[3], a[1], d, carry);
zaddmulp(c[4], a[2], d, carry);
zaddmulp(c[5], a[3], d, carry);
c[6] = carry;
break;
}
if (c[sc] == 0) sc--;
*c = sc;
if (aneg) *a = -sa;
return;
}
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && sa >= NTL_GMP_SQR_CROSS) {
long gsa = L_TO_G(sa);
mp_limb_t *a_p, *c_p, *end_p;;
NTL_GSPACE(gsa + gsa + gsa);
a_p = gspace_data;
c_p = a_p + gsa;
end_p = c_p + (gsa + gsa);
lip_to_gmp(a+1, a_p, sa);
if (!a_p[gsa-1]) gsa--;
end_p[-1] = 0;
end_p[-2] = 0;
mpn_mul(c_p, a_p, gsa, a_p, gsa);
gmp_to_lip(c+1, c_p, sc);
if (!c[sc]) sc--;
c[0] = sc;
if (aneg) *a = - *a;
return;
}
#endif
if (sa < KARSX) {
/* classic algorithm */
long carry, i, j, *pc;
pc = c;
for (i = sc; i; i--) {
pc++;
*pc = 0;
}
carry = 0;
i = 0;
for (j = 1; j <= sa; j++) {
unsigned long uncar;
long t;
i += 2;
uncar = ((unsigned long) carry) + (((unsigned long) c[i-1]) << 1);
t = uncar & NTL_RADIXM;
zaddmulpsq(t, a[j], carry);
c[i-1] = t;
zaddmulsq(sa-j, c+i, a+j);
uncar = (uncar >> NTL_NBITS) + (((unsigned long) c[i]) << 1);
uncar += ((unsigned long) carry);
carry = uncar >> NTL_NBITS;
c[i] = uncar & NTL_RADIXM;
}
while (c[sc] == 0 && sc > 1) sc--;
c[0] = sc;
if (aneg) *a = - *a;
}
else {
/* karatsuba */
long n, hn, sp;
n = *a;
sp = 0;
do {
hn = (n + 1) >> 1;
sp += hn + hn + hn + 5;
n = hn+1;
} while (n >= KARSX);
if (sp > max_kmem) {
if (max_kmem == 0)
kmem = (long *) NTL_MALLOC(sp, sizeof(long), 0);
else
kmem = (long *) NTL_REALLOC(kmem, sp, sizeof(long), 0);
max_kmem = sp;
if (!kmem) zhalt("out of memory in karatsuba");
}
kar_sq(c, a, kmem);
if (aneg) *a = - *a;
}
}
long _ntl_zsdiv(_ntl_verylong a, long d, _ntl_verylong *bb)
{
long sa;
_ntl_verylong b = *bb;
if (!d) {
zhalt("division by zero in _ntl_zsdiv");
}
if (!a) {
_ntl_zzero(bb);
return (0);
}
if (d == 2) {
long is_odd = a[1] & 1;
long fix = (a[0] < 0) & is_odd;
_ntl_zrshift(a, 1, bb);
if (fix) _ntl_zsadd(*bb, -1, bb);
return is_odd;
}
if ((sa = a[0]) < 0)
sa = (-sa);
/* if b aliases a, then b won't move */
_ntl_zsetlength(&b, sa);
*bb = b;
if ((d >= NTL_RADIX) || (d <= -NTL_RADIX)) {
static _ntl_verylong zd = 0;
static _ntl_verylong zb = 0;
_ntl_zintoz(d, &zb);
_ntl_zdiv(a, zb, &b, &zd);
*bb = b;
return (_ntl_ztoint(zd));
}
else {
long den = d;
double deninv;
long carry = 0;
long i;
long flag = (*a < 0 ? 2 : 0) | (den < 0 ? 1 : 0);
if (den < 0)
den = -den;
deninv = 1.0 / ((double) den);
if (a[sa] < den && sa > 1)
carry = a[sa--];
for (i = sa; i; i--) {
zdiv21(carry, a[i], den, deninv, b[i]);
}
while ((sa > 1) && (!(b[sa])))
sa--;
b[0] = sa;
if (flag) {
if (flag <= 2) {
if (!carry)
_ntl_znegate(&b);
else {
_ntl_zsadd(b, 1, &b);
b[0] = -b[0];
if (flag == 1)
carry = carry - den;
else
carry = den - carry;
*bb = b;
}
}
else
carry = -carry;
}
return (carry);
}
}
long _ntl_zsmod(_ntl_verylong a, long d)
{
long sa;
if (!a) {
return (0);
}
if (d == 2) return (a[1] & 1);
if (!d) {
zhalt("division by zero in _ntl_zsdiv");
}
if ((sa = a[0]) < 0)
sa = (-sa);
if ((d >= NTL_RADIX) || (d <= -NTL_RADIX)) {
static _ntl_verylong zd = 0;
static _ntl_verylong zb = 0;
_ntl_zintoz(d, &zb);
_ntl_zmod(a, zb, &zd);
return (_ntl_ztoint(zd));
}
else {
long den = d;
double deninv;
long carry = 0;
long i;
long flag = (*a < 0 ? 2 : 0) | (den < 0 ? 1 : 0);
if (den < 0)
den = -den;
deninv = 1.0 / ((double) den);
if (a[sa] < den && sa > 1)
carry = a[sa--];
for (i = sa; i; i--) {
zrem21(carry, a[i], den, deninv);
}
if (flag) {
if (flag <= 2) {
if (carry) {
if (flag == 1)
carry = carry - den;
else
carry = den - carry;
}
}
else
carry = -carry;
}
return (carry);
}
}
void _ntl_zmultirem(_ntl_verylong a, long n, long* dd, long *rr)
{
long j;
long sa;
if (!a || (a[0] == 1 && a[1] == 0)) {
for (j = 0; j < n; j++) rr[j] = 0;
return;
}
sa = a[0];
for (j = 0; j < n; j++) {
long den = dd[j];
double deninv;
long carry = 0;
long i;
long lsa = sa;
deninv = 1.0 / ((double) den);
if (a[lsa] < den && lsa > 1)
carry = a[lsa--];
for (i = lsa; i; i--) {
zrem21(carry, a[i], den, deninv);
}
rr[j] = carry;
}
}
#if (defined(NTL_SINGLE_MUL))
#define REDUCE_CROSS 8
void _ntl_zmultirem2(_ntl_verylong a, long n, long* dd, double **ttbl, long *rr)
{
long d;
double *tbl;
long ac0, ac1, ac2;
long *ap;
double *tp;
long sa, i, j, k;
unsigned long pc0, pc1, lo_wd, hi_wd;
long carry;
double xx, yy, dinv;
if (!a || a[0] < REDUCE_CROSS || a[0] >= NTL_RADIX) {
_ntl_zmultirem(a, n, dd, rr);
return;
}
sa = a[0];
for (i = 0; i < n; i++) {
d = dd[i];
tbl = ttbl[i];
ac0 = a[1];
ac1 = 0;
ac2 = 0;
ap = &a[2];
tp = &tbl[1];
k = sa-1;
while (k) {
j = k;
if (j > 64) j = 64;
k -= j;
pc0 = 0;
pc1 = 0;
xx = ((double) (*ap++))* (*tp++) + DENORMALIZE;
for (; j > 1; j--) {
yy = ((double) (*ap++))*(*tp++)
+ DENORMALIZE;
NTL_FetchHiLo(hi_wd, lo_wd, xx);
pc0 += lo_wd & 0x3FFFFFF;
pc1 += ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
xx = yy;
}
NTL_FetchHiLo(hi_wd, lo_wd, xx);
pc0 += lo_wd & 0x3FFFFFF;
pc1 += ((hi_wd<<6)|(lo_wd>>26)) & 0x3FFFFFF;
pc1 += (pc0 >> 26);
ac0 += (pc0 & 0x3FFFFFF);
if (ac0 > NTL_RADIX) {
ac0 -= NTL_RADIX;
carry = 1;
}
else
carry = 0;
ac1 += carry + (pc1 & 0x3FFFFFF);
if (ac1 > NTL_RADIX) {
ac1 -= NTL_RADIX;
carry = 1;
}
else
carry = 0;
ac2 += carry + (pc1 >> 26);
}
carry = 0;
dinv = ((double) 1)/((double) d);
if (ac2 >= d) {
zrem21(carry, ac2, d, dinv);
}
else
carry = ac2;
zrem21(carry, ac1, d, dinv);
zrem21(carry, ac0, d, dinv);
rr[i] = carry;
}
}
#elif (defined(NTL_TBL_REM))
#if (defined(NTL_LONG_LONG))
/* This version uses the double-word long type directly.
* It's a little faster that the other one.
* It accumlates 8 double-word products before stepping
* a higher-level accumulator.
*/
void _ntl_zmultirem3(_ntl_verylong a, long n, long* dd, long **ttbl, long *rr)
{
long sa, i, j, d, *tbl, ac0, ac1, ac2, *ap, *tp, k, carry;
double dinv;
NTL_LL_TYPE acc;
if (!a || a[0] < 8 || a[0] >= NTL_RADIX) {
_ntl_zmultirem(a, n, dd, rr);
return;
}
sa = a[0];
for (i = 0; i < n; i++) {
d = dd[i];
tbl = ttbl[i];
acc = a[1];
ac2 = 0;
ap = &a[2];
tp = &tbl[1];
k = sa - 7;
for (j = 0; j < k; j += 7) {
acc += ((NTL_LL_TYPE) ap[j+0]) * ((NTL_LL_TYPE) tp[j+0]);
acc += ((NTL_LL_TYPE) ap[j+1]) * ((NTL_LL_TYPE) tp[j+1]);
acc += ((NTL_LL_TYPE) ap[j+2]) * ((NTL_LL_TYPE) tp[j+2]);
acc += ((NTL_LL_TYPE) ap[j+3]) * ((NTL_LL_TYPE) tp[j+3]);
acc += ((NTL_LL_TYPE) ap[j+4]) * ((NTL_LL_TYPE) tp[j+4]);
acc += ((NTL_LL_TYPE) ap[j+5]) * ((NTL_LL_TYPE) tp[j+5]);
acc += ((NTL_LL_TYPE) ap[j+6]) * ((NTL_LL_TYPE) tp[j+6]);
ac2 += (long) (acc >> (2*NTL_NBITS));
acc &= (((NTL_LL_TYPE) 1) << (2*NTL_NBITS)) - ((NTL_LL_TYPE) 1);
}
k = sa - 1;
for (; j < k; j++)
acc += ((NTL_LL_TYPE) ap[j+0]) * ((NTL_LL_TYPE) tp[j+0]);
ac2 += (long) (acc >> (2*NTL_NBITS));
acc &= (((NTL_LL_TYPE) 1) << (2*NTL_NBITS)) - ((NTL_LL_TYPE) 1);
ac0 = (long) (acc & ( (((NTL_LL_TYPE) 1) << (NTL_NBITS)) - ((NTL_LL_TYPE) 1) ));
ac1 = (long) (acc >> NTL_NBITS);
carry = 0;
dinv = ((double) 1)/((double) d);
if (ac2 >= d) {
zrem21(carry, ac2, d, dinv);
}
else
carry = ac2;
zrem21(carry, ac1, d, dinv);
zrem21(carry, ac0, d, dinv);
rr[i] = carry;
}
}
#else
void _ntl_zmultirem3(_ntl_verylong a, long n, long* dd, long **ttbl, long *rr)
{
long sa, i, d, *tbl, ac0, ac1, ac2, *ap, *tp, k, t, carry;
double dinv;
if (!a || a[0] < 8 || a[0] >= NTL_RADIX) {
_ntl_zmultirem(a, n, dd, rr);
return;
}
sa = a[0];
for (i = 0; i < n; i++) {
d = dd[i];
tbl = ttbl[i];
ac0 = a[1];
ac1 = 0;
ac2 = 0;
ap = &a[2];
tp = &tbl[1];
k = sa-1;
while (k) {
zxmulp(t, *ap, *tp, ac0);
ac1 += ac0;
ac2 += ac1 >> NTL_NBITS;
ac1 &= NTL_RADIXM;
ac0 = t;
k--;
ap++;
tp++;
}
carry = 0;
dinv = ((double) 1)/((double) d);
if (ac2 >= d) {
zrem21(carry, ac2, d, dinv);
}
else
carry = ac2;
zrem21(carry, ac1, d, dinv);
zrem21(carry, ac0, d, dinv);
rr[i] = carry;
}
}
#endif
#endif
long _ntl_zsfastrem(_ntl_verylong a, long d)
/* assumes a >= 0, and 0 < d < NTL_RADIX */
/* computes a % d */
{
long sa;
if (!a || (a[0] == 1 && a[1] == 0)) {
return 0;
}
sa = a[0];
{
long den = d;
double deninv = ((double)1)/((double)den);
long carry = 0;
long i;
long lsa = sa;
if (a[lsa] < den && lsa > 1)
carry = a[lsa--];
for (i = lsa; i; i--) {
zrem21(carry, a[i], den, deninv);
}
return carry;
}
}
void _ntl_zdiv(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *qq, _ntl_verylong *rr)
{
long sa, sb, sq, i;
long sign;
long q1;
long *rp;
double btopinv, aux;
static _ntl_verylong q=0, r=0;
if (!b || (((sb=b[0]) == 1) && (!b[1]))) {
zhalt("division by zero in _ntl_zdiv");
}
if (!a || (((sa=a[0]) == 1) && (!a[1]))) {
_ntl_zzero(qq);
if (rr) _ntl_zzero(rr);
return;
}
if (sb == 1) {
long t1 = _ntl_zsdiv(a, b[1], qq);
if (rr) _ntl_zintoz(t1, rr);
return;
}
if (sb == -1) {
long t1 = _ntl_zsdiv(a, -b[1], qq);
if (rr) _ntl_zintoz(t1, rr);
return;
}
sign = 0;
if (sa < 0) {
a[0] = sa = -sa;
sign = 2;
}
if (sb < 0) {
b[0] = sb = -sb;
sign |= 1;
}
sq = sa-sb+1;
if (sq <= 0) {
_ntl_zcopy(a, &r);
_ntl_zzero(&q);
goto done;
}
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && sb >= NTL_GMP_DIV_CROSS && sq >= NTL_GMP_DIV_CROSS) {
GT_INIT
lip_to_mpz(a, gt_1);
lip_to_mpz(b, gt_2);
mpz_fdiv_qr(gt_3, gt_4, gt_1, gt_2);
mpz_to_lip(&q, gt_3);
mpz_to_lip(&r, gt_4);
goto done;
}
#endif
_ntl_zsetlength(&q, sq);
_ntl_zsetlength(&r, sa+1);
_ntl_zcopy(a, &r);
rp = &r[sa+1];
*rp = 0;
r[0] = 0; /* this streamlines the last evaluation of aux */
btopinv = b[sb]*NTL_FRADIX + b[sb-1];
if (sb > 2)
btopinv = NTL_FRADIX / (btopinv*NTL_FRADIX + b[sb-2]);
else
btopinv = 1.0 / btopinv;
aux = btopinv*(rp[-1]*NTL_FRADIX + rp[-2]);
if (aux >= NTL_FRADIX)
aux = NTL_FRADIX-1;
for (i = sq; i >= 1; i--, rp--) {
q1 = (long) aux;
if (q1) {
zsubmul(q1, &r[i], b);
}
while (rp[0] < 0) {
zaddmulone(&r[i], b);
q1--;
}
while (rp[0] > 0) {
zsubmulone(&r[i], b);
q1++;
}
aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]);
while (aux > NTL_FRADIX - 16) {
/* q1 might be too small */
if (aux >= NTL_FRADIX)
aux = NTL_FRADIX-1;
zsubmulone(&r[i], b);
if (rp[0] < 0) {
/* oops...false alarm! */
zaddmulone(&r[i], b);
break;
}
else {
q1++;
aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]);
}
}
q[i] = q1;
}
while (sq > 1 && q[sq] == 0) sq--;
q[0] = sq;
i = sb;
while (i > 1 && r[i] == 0) i--;
r[0] = i;
done:
if (sign)
{
if (sign <= 2)
{
if (!(r[1]) && (r[0] == 1))
_ntl_znegate(&q);
else
{
_ntl_zsadd(q, 1, &q);
_ntl_znegate(&q);
if (sign == 1)
_ntl_zsub(r, b, &r);
else
_ntl_zsub(b, r, &r);
}
}
else
_ntl_znegate(&r);
if (sign & 2)
a[0] = -sa;
if (sign & 1)
b[0] = -sb;
}
_ntl_zcopy(q, qq);
if (rr) _ntl_zcopy(r, rr);
}
void
_ntl_zmod(_ntl_verylong a, _ntl_verylong b, _ntl_verylong *rr)
{
long sa, sb, sq, i;
long sign;
long q1;
long *rp;
double btopinv, aux;
static _ntl_verylong r=0;
if (!b || (((sb=b[0]) == 1) && (!b[1]))) {
zhalt("division by zero in _ntl_zdiv");
}
if (!a || (((sa=a[0]) == 1) && (!a[1]))) {
_ntl_zzero(rr);
return;
}
if (sb == 1) {
_ntl_zintoz(_ntl_zsmod(a, b[1]), rr);
return;
}
if (sb == -1) {
_ntl_zintoz(_ntl_zsmod(a, -b[1]), rr);
return;
}
sign = 0;
if (sa < 0) {
a[0] = sa = -sa;
sign = 2;
}
if (sb < 0) {
b[0] = sb = -sb;
sign |= 1;
}
sq = sa-sb+1;
if (sq <= 0) {
_ntl_zcopy(a, &r);
goto done;
}
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && sb >= NTL_GMP_REM_CROSS && sq >= NTL_GMP_REM_CROSS) {
GT_INIT
lip_to_mpz(a, gt_1);
lip_to_mpz(b, gt_2);
mpz_fdiv_r(gt_4, gt_1, gt_2);
mpz_to_lip(&r, gt_4);
goto done;
}
#endif
_ntl_zsetlength(&r, sa+1);
_ntl_zcopy(a, &r);
rp = &r[sa+1];
*rp = 0;
r[0] = 0; /* this streamlines the last evaluation of aux */
btopinv = b[sb]*NTL_FRADIX + b[sb-1];
if (sb > 2)
btopinv = NTL_FRADIX / (btopinv*NTL_FRADIX + b[sb-2]);
else
btopinv = 1.0 / btopinv;
aux = btopinv*(rp[-1]*NTL_FRADIX + rp[-2]);
if (aux >= NTL_FRADIX)
aux = NTL_FRADIX-1;
for (i = sq; i >= 1; i--, rp--) {
q1 = (long) aux;
if (q1) {
zsubmul(q1, &r[i], b);
}
while (rp[0] < 0) {
zaddmulone(&r[i], b);
}
while (rp[0] > 0) {
zsubmulone(&r[i], b);
}
aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]);
while (aux > NTL_FRADIX - 16) {
/* q1 might be too small */
if (aux >= NTL_FRADIX)
aux = NTL_FRADIX-1;
zsubmulone(&r[i], b);
if (rp[0] < 0) {
/* oops...false alarm! */
zaddmulone(&r[i], b);
break;
}
else {
aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]);
}
}
}
i = sb;
while (i > 1 && r[i] == 0) i--;
r[0] = i;
done:
if (sign)
{
if (sign <= 2)
{
if (!(r[1]) && (r[0] == 1))
/* no op */;
else
{
if (sign == 1)
_ntl_zsub(r, b, &r);
else
_ntl_zsub(b, r, &r);
}
}
else
_ntl_znegate(&r);
if (sign & 2)
a[0] = -sa;
if (sign & 1)
b[0] = -sb;
}
_ntl_zcopy(r, rr);
}
void
_ntl_zquickmod(_ntl_verylong *rr, _ntl_verylong b)
{
long sa, sb, sq, i;
long q1;
long *rp;
double btopinv, aux;
_ntl_verylong r;
sb = b[0];
r = *rr;
if (!r || (((sa=r[0]) == 1) && (!r[1]))) {
_ntl_zzero(rr);
return;
}
if (sb == 1) {
_ntl_zintoz(_ntl_zsmod(r, b[1]), rr);
return;
}
sq = sa-sb+1;
if (sq <= 0) {
return;
}
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && sb >= NTL_GMP_QREM_CROSS && sq >= NTL_GMP_QREM_CROSS) {
GT_INIT
lip_to_mpz(r, gt_1);
lip_to_mpz(b, gt_2);
mpz_fdiv_r(gt_4, gt_1, gt_2);
mpz_to_lip(rr, gt_4);
return;
}
#endif
_ntl_zsetlength(rr, sa+1);
r = *rr;
rp = &r[sa+1];
*rp = 0;
r[0] = 0; /* this streamlines the last evaluation of aux */
btopinv = b[sb]*NTL_FRADIX + b[sb-1];
if (sb > 2)
btopinv = NTL_FRADIX / (btopinv*NTL_FRADIX + b[sb-2]);
else
btopinv = 1.0 / btopinv;
aux = btopinv*(rp[-1]*NTL_FRADIX + rp[-2]);
if (aux >= NTL_FRADIX)
aux = NTL_FRADIX-1;
for (i = sq; i >= 1; i--, rp--) {
q1 = (long) aux;
if (q1) {
zsubmul(q1, &r[i], b);
}
while (rp[0] < 0) {
zaddmulone(&r[i], b);
}
while (rp[0] > 0) {
zsubmulone(&r[i], b);
}
aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]);
while (aux > NTL_FRADIX - 16) {
/* q1 might be too small */
if (aux >= NTL_FRADIX)
aux = NTL_FRADIX-1;
zsubmulone(&r[i], b);
if (rp[0] < 0) {
/* oops...false alarm! */
zaddmulone(&r[i], b);
break;
}
else {
aux = btopinv*((rp[-1]*NTL_FRADIX + rp[-2])*NTL_FRADIX + rp[-3]);
}
}
}
i = sb;
while (i > 1 && r[i] == 0) i--;
r[0] = i;
}
void
_ntl_zaddmod(
_ntl_verylong a,
_ntl_verylong b,
_ntl_verylong n,
_ntl_verylong *c
)
{
if (*c != n) {
_ntl_zadd(a, b, c);
if (_ntl_zcompare(*c, n) >= 0)
_ntl_zsubpos(*c, n, c);
}
else {
static _ntl_verylong mem = 0;
_ntl_zadd(a, b, &mem);
if (_ntl_zcompare(mem, n) >= 0)
_ntl_zsubpos(mem, n, c);
else
_ntl_zcopy(mem, c);
}
}
void
_ntl_zsubmod(
_ntl_verylong a,
_ntl_verylong b,
_ntl_verylong n,
_ntl_verylong *c
)
{
static _ntl_verylong mem = 0;
long cmp;
if ((cmp=_ntl_zcompare(a, b)) < 0) {
_ntl_zadd(n, a, &mem);
_ntl_zsubpos(mem, b, c);
} else if (!cmp)
_ntl_zzero(c);
else
_ntl_zsubpos(a, b, c);
}
void
_ntl_zsmulmod(
_ntl_verylong a,
long d,
_ntl_verylong n,
_ntl_verylong *c
)
{
static _ntl_verylong mem = 0;
_ntl_zsmul(a, d, &mem);
_ntl_zquickmod(&mem, n);
_ntl_zcopy(mem, c);
}
void
_ntl_zmulmod(
_ntl_verylong a,
_ntl_verylong b,
_ntl_verylong n,
_ntl_verylong *c
)
{
static _ntl_verylong mem = 0;
_ntl_zmul(a, b, &mem);
_ntl_zquickmod(&mem, n);
_ntl_zcopy(mem, c);
}
void
_ntl_zsqmod(
_ntl_verylong a,
_ntl_verylong n,
_ntl_verylong *c
)
{
static _ntl_verylong mem = 0;
_ntl_zsq(a, &mem);
_ntl_zquickmod(&mem, n);
_ntl_zcopy(mem, c);
}
void
_ntl_zinvmod(
_ntl_verylong a,
_ntl_verylong n,
_ntl_verylong *c
)
{
if (_ntl_zinv(a, n, c))
zhalt("undefined inverse in _ntl_zinvmod");
}
static long
zxxeucl(
_ntl_verylong ain,
_ntl_verylong nin,
_ntl_verylong *invv,
_ntl_verylong *uu
)
{
static _ntl_verylong a = 0;
static _ntl_verylong n = 0;
static _ntl_verylong q = 0;
static _ntl_verylong w = 0;
static _ntl_verylong x = 0;
static _ntl_verylong y = 0;
static _ntl_verylong z = 0;
_ntl_verylong inv = *invv;
_ntl_verylong u = *uu;
long diff;
long ilo;
long sa;
long sn;
long temp;
long e;
long fast;
long parity;
long gotthem;
_ntl_verylong pin;
_ntl_verylong p;
long i;
long try11;
long try12;
long try21;
long try22;
long got11;
long got12;
long got21;
long got22;
double hi;
double lo;
double dt;
double fhi, fhi1;
double flo, flo1;
double num;
double den;
double dirt;
_ntl_zsetlength(&a, (e = (ain[0] > nin[0] ? ain[0] : nin[0])));
_ntl_zsetlength(&n, e);
_ntl_zsetlength(&q, e);
_ntl_zsetlength(&w, e);
_ntl_zsetlength(&x, e);
_ntl_zsetlength(&y, e);
_ntl_zsetlength(&z, e);
_ntl_zsetlength(&inv, e);
*invv = inv;
_ntl_zsetlength(&u, e);
*uu = u;
fhi1 = 1.0 + ((double) 32.0)/NTL_FDOUBLE_PRECISION;
flo1 = 1.0 - ((double) 32.0)/NTL_FDOUBLE_PRECISION;
fhi = 1.0 + ((double) 8.0)/NTL_FDOUBLE_PRECISION;
flo = 1.0 - ((double) 8.0)/NTL_FDOUBLE_PRECISION;
pin = &ain[0];
p = &a[0];
for (i = (*pin); i >= 0; i--)
*p++ = *pin++;
pin = &nin[0];
p = &n[0];
for (i = (*pin); i >= 0; i--)
*p++ = *pin++;
inv[0] = 1;
inv[1] = 1;
w[0] = 1;
w[1] = 0;
while (n[0] > 1 || n[1] > 0)
{
gotthem = 0;
sa = a[0];
sn = n[0];
diff = sa - sn;
if (!diff || diff == 1)
{
sa = a[0];
p = &a[sa];
num = ((double) (*p)) * NTL_FRADIX;
if (sa > 1)
num += (*(--p));
num *= NTL_FRADIX;
if (sa > 2)
num += (*(p - 1));
sn = n[0];
p = &n[sn];
den = (double) (*p) * NTL_FRADIX;
if (sn > 1)
den += (*(--p));
den *= NTL_FRADIX;
if (sn > 2)
den += (*(p - 1));
hi = fhi1 * (num + 1.0) / den;
lo = flo1 * num / (den + 1.0);
if (diff > 0)
{
hi *= NTL_FRADIX;
lo *= NTL_FRADIX;
}
try11 = 1;
try12 = 0;
try21 = 0;
try22 = 1;
parity = 1;
fast = 1;
while (fast > 0)
{
parity = 1 - parity;
if (hi >= NTL_FRADIX)
fast = 0;
else
{
ilo = (long)lo;
dirt = hi - ilo;
if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi)
fast = 0;
else
{
dt = lo-ilo;
lo = flo / dirt;
if (dt > 1.0/NTL_FDOUBLE_PRECISION)
hi = fhi / dt;
else
hi = NTL_FRADIX;
temp = try11;
try11 = try21;
if ((NTL_RADIX - temp) / ilo < try21)
fast = 0;
else
try21 = temp + ilo * try21;
temp = try12;
try12 = try22;
if ((NTL_RADIX - temp) / ilo < try22)
fast = 0;
else
try22 = temp + ilo * try22;
if ((fast > 0) && (parity > 0))
{
gotthem = 1;
got11 = try11;
got12 = try12;
got21 = try21;
got22 = try22;
}
}
}
}
}
if (gotthem)
{
_ntl_zsmul(inv, got11, &x);
_ntl_zsmul(w, got12, &y);
_ntl_zsmul(inv, got21, &z);
_ntl_zsmul(w, got22, &w);
_ntl_zadd(x, y, &inv);
_ntl_zadd(z, w, &w);
_ntl_zsmul(a, got11, &x);
_ntl_zsmul(n, got12, &y);
_ntl_zsmul(a, got21, &z);
_ntl_zsmul(n, got22, &n);
_ntl_zsub(x, y, &a);
_ntl_zsub(n, z, &n);
}
else
{
_ntl_zdiv(a, n, &q, &a);
_ntl_zmul(q, w, &x);
_ntl_zadd(inv, x, &inv);
if (a[0] > 1 || a[1] > 0)
{
_ntl_zdiv(n, a, &q, &n);
_ntl_zmul(q, inv, &x);
_ntl_zadd(w, x, &w);
}
else
{
p = &a[0];
pin = &n[0];
for (i = (*pin); i >= 0; i--)
*p++ = *pin++;
n[0] = 1;
n[1] = 0;
_ntl_zcopy(w, &inv);
_ntl_znegate(&inv);
}
}
}
if ((a[0] == 1) && (a[1] == 1))
e = 0;
else
e = 1;
p = &u[0];
pin = &a[0];
for (i = (*pin); i >= 0; i--)
*p++ = *pin++;
*invv = inv;
*uu = u;
return (e);
}
long
_ntl_zinv(
_ntl_verylong ain,
_ntl_verylong nin,
_ntl_verylong *invv
)
{
static _ntl_verylong u = 0;
static _ntl_verylong v = 0;
long sgn;
if (_ntl_zscompare(nin, 1) <= 0) {
zhalt("InvMod: second input <= 1");
}
sgn = _ntl_zsign(ain);
if (sgn < 0) {
zhalt("InvMod: first input negative");
}
if (_ntl_zcompare(ain, nin) >= 0) {
zhalt("InvMod: first input too big");
}
if (sgn == 0) {
_ntl_zcopy(nin, invv);
return 1;
}
#ifdef NTL_GMP_HACK
#if (__GNU_MP_VERSION >= 3)
/* versions of gmp prior to version 3 have really bad xgcd */
if (_ntl_gmp_hack && nin[0] >= NTL_GMP_INVMOD_CROSS) {
GT_INIT
lip_to_mpz(ain, gt_1);
lip_to_mpz(nin, gt_2);
mpz_gcdext(gt_3, gt_4, 0, gt_1, gt_2);
mpz_to_lip(&u, gt_3);
mpz_to_lip(&v, gt_4);
if (u && u[0] == 1 && u[1] == 1) {
/*
* We make sure v is in range 0..n-1,
* just in case GMP is sloppy.
*/
while (_ntl_zsign(v) < 0)
_ntl_zadd(v, nin, &v);
while (_ntl_zcompare(v, nin) >= 0)
_ntl_zsub(v, nin, &v);
_ntl_zcopy(v, invv);
return 0;
}
else {
_ntl_zcopy(u, invv);
return 1;
}
}
#endif
#endif
if (!(zxxeucl(ain, nin, &v, &u))) {
if (_ntl_zsign(v) < 0) _ntl_zadd(v, nin, &v);
_ntl_zcopy(v, invv);
return 0;
}
_ntl_zcopy(u, invv);
return 1;
}
void
_ntl_zexteucl(
_ntl_verylong aa,
_ntl_verylong *xa,
_ntl_verylong bb,
_ntl_verylong *xb,
_ntl_verylong *d
)
{
static _ntl_verylong modcon = 0;
static _ntl_verylong a=0, b=0;
long anegative = 0;
long bnegative = 0;
_ntl_zcopy(aa, &a);
_ntl_zcopy(bb, &b);
if (anegative = (a[0] < 0))
a[0] = -a[0];
if (bnegative = (b[0] < 0))
b[0] = -b[0];
if (!b[1] && (b[0] == 1))
{
_ntl_zone(xa);
_ntl_zzero(xb);
_ntl_zcopy(a, d);
goto done;
}
if (!a[1] && (a[0] == 1))
{
_ntl_zzero(xa);
_ntl_zone(xb);
_ntl_zcopy(b, d);
goto done;
}
#ifdef NTL_GMP_HACK
#if (__GNU_MP_VERSION >= 3)
/* versions of gmp prior to version 3 have really bad xgcd */
if (_ntl_gmp_hack &&
a[0] >= NTL_GMP_XGCD_CROSS && b[0] >= NTL_GMP_XGCD_CROSS) {
GT_INIT
lip_to_mpz(aa, gt_1);
lip_to_mpz(bb, gt_2);
mpz_gcdext(gt_3, gt_4, gt_5, gt_1, gt_2);
mpz_to_lip(&a, gt_3);
mpz_to_lip(&b, gt_4);
mpz_to_lip(&modcon, gt_5);
_ntl_zcopy(a, d);
_ntl_zcopy(b, xa);
_ntl_zcopy(modcon, xb);
return;
}
#endif
#endif
zxxeucl(a, b, xa, d);
_ntl_zmul(a, *xa, xb);
_ntl_zsub(*d, *xb, xb);
_ntl_zdiv(*xb, b, xb, &modcon);
if ((modcon[1]) || (modcon[0] != 1))
{
zhalt("non-zero remainder in _ntl_zexteucl BUG");
}
done:
if (anegative)
{
_ntl_znegate(xa);
}
if (bnegative)
{
_ntl_znegate(xb);
}
}
/* I've adapted LIP's extended euclidean algorithm to
* do rational reconstruction. -- VJS.
*/
long
_ntl_zxxratrecon(
_ntl_verylong ain,
_ntl_verylong nin,
_ntl_verylong num_bound,
_ntl_verylong den_bound,
_ntl_verylong *num_out,
_ntl_verylong *den_out
)
{
static _ntl_verylong a = 0;
static _ntl_verylong n = 0;
static _ntl_verylong q = 0;
static _ntl_verylong w = 0;
static _ntl_verylong x = 0;
static _ntl_verylong y = 0;
static _ntl_verylong z = 0;
static _ntl_verylong inv = 0;
static _ntl_verylong u = 0;
static _ntl_verylong a_bak = 0;
static _ntl_verylong n_bak = 0;
static _ntl_verylong inv_bak = 0;
static _ntl_verylong w_bak = 0;
_ntl_verylong p;
long diff;
long ilo;
long sa;
long sn;
long snum;
long sden;
long e;
long fast;
long temp;
long parity;
long gotthem;
long try11;
long try12;
long try21;
long try22;
long got11;
long got12;
long got21;
long got22;
double hi;
double lo;
double dt;
double fhi, fhi1;
double flo, flo1;
double num;
double den;
double dirt;
if (_ntl_zsign(num_bound) < 0)
zhalt("rational reconstruction: bad numerator bound");
if (!num_bound)
snum = 1;
else
snum = num_bound[0];
if (_ntl_zsign(den_bound) <= 0)
zhalt("rational reconstruction: bad denominator bound");
sden = den_bound[0];
if (_ntl_zsign(nin) <= 0)
zhalt("rational reconstruction: bad modulus");
if (_ntl_zsign(ain) < 0 || _ntl_zcompare(ain, nin) >= 0)
zhalt("rational reconstruction: bad residue");
e = nin[0];
_ntl_zsetlength(&a, e);
_ntl_zsetlength(&n, e);
_ntl_zsetlength(&q, e);
_ntl_zsetlength(&w, e);
_ntl_zsetlength(&x, e);
_ntl_zsetlength(&y, e);
_ntl_zsetlength(&z, e);
_ntl_zsetlength(&inv, e);
_ntl_zsetlength(&u, e);
_ntl_zsetlength(&a_bak, e);
_ntl_zsetlength(&n_bak, e);
_ntl_zsetlength(&inv_bak, e);
_ntl_zsetlength(&w_bak, e);
fhi1 = 1.0 + ((double) 32.0)/NTL_FDOUBLE_PRECISION;
flo1 = 1.0 - ((double) 32.0)/NTL_FDOUBLE_PRECISION;
fhi = 1.0 + ((double) 8.0)/NTL_FDOUBLE_PRECISION;
flo = 1.0 - ((double) 8.0)/NTL_FDOUBLE_PRECISION;
_ntl_zcopy(ain, &a);
_ntl_zcopy(nin, &n);
_ntl_zone(&inv);
_ntl_zzero(&w);
while (1)
{
if (w[0] >= sden && _ntl_zcompare(w, den_bound) > 0) break;
if (n[0] <= snum && _ntl_zcompare(n, num_bound) <= 0) break;
_ntl_zcopy(a, &a_bak);
_ntl_zcopy(n, &n_bak);
_ntl_zcopy(w, &w_bak);
_ntl_zcopy(inv, &inv_bak);
gotthem = 0;
sa = a[0];
sn = n[0];
diff = sa - sn;
if (!diff || diff == 1)
{
sa = a[0];
p = &a[sa];
num = (double) (*p) * NTL_FRADIX;
if (sa > 1)
num += (*(--p));
num *= NTL_FRADIX;
if (sa > 2)
num += (*(p - 1));
sn = n[0];
p = &n[sn];
den = (double) (*p) * NTL_FRADIX;
if (sn > 1)
den += (*(--p));
den *= NTL_FRADIX;
if (sn > 2)
den += (*(p - 1));
hi = fhi1 * (num + 1.0) / den;
lo = flo1 * num / (den + 1.0);
if (diff > 0)
{
hi *= NTL_FRADIX;
lo *= NTL_FRADIX;
}
try11 = 1;
try12 = 0;
try21 = 0;
try22 = 1;
parity = 1;
fast = 1;
while (fast > 0)
{
parity = 1 - parity;
if (hi >= NTL_FRADIX)
fast = 0;
else
{
ilo = (long)lo;
dirt = hi - ilo;
if (dirt < 1.0/NTL_FDOUBLE_PRECISION || !ilo || ilo < (long)hi)
fast = 0;
else
{
dt = lo-ilo;
lo = flo / dirt;
if (dt > 1.0/NTL_FDOUBLE_PRECISION)
hi = fhi / dt;
else
hi = NTL_FRADIX;
temp = try11;
try11 = try21;
if ((NTL_RADIX - temp) / ilo < try21)
fast = 0;
else
try21 = temp + ilo * try21;
temp = try12;
try12 = try22;
if ((NTL_RADIX - temp) / ilo < try22)
fast = 0;
else
try22 = temp + ilo * try22;
if ((fast > 0) && (parity > 0))
{
gotthem = 1;
got11 = try11;
got12 = try12;
got21 = try21;
got22 = try22;
}
}
}
}
}
if (gotthem)
{
_ntl_zsmul(inv, got11, &x);
_ntl_zsmul(w, got12, &y);
_ntl_zsmul(inv, got21, &z);
_ntl_zsmul(w, got22, &w);
_ntl_zadd(x, y, &inv);
_ntl_zadd(z, w, &w);
_ntl_zsmul(a, got11, &x);
_ntl_zsmul(n, got12, &y);
_ntl_zsmul(a, got21, &z);
_ntl_zsmul(n, got22, &n);
_ntl_zsub(x, y, &a);
_ntl_zsub(n, z, &n);
}
else
{
_ntl_zdiv(a, n, &q, &a);
_ntl_zmul(q, w, &x);
_ntl_zadd(inv, x, &inv);
if (a[0] > 1 || a[1] > 0)
{
_ntl_zdiv(n, a, &q, &n);
_ntl_zmul(q, inv, &x);
_ntl_zadd(w, x, &w);
}
else
{
break;
}
}
}
_ntl_zcopy(a_bak, &a);
_ntl_zcopy(n_bak, &n);
_ntl_zcopy(w_bak, &w);
_ntl_zcopy(inv_bak, &inv);
_ntl_znegate(&w);
while (1)
{
sa = w[0];
if (sa < 0) w[0] = -sa;
if (w[0] >= sden && _ntl_zcompare(w, den_bound) > 0) return 0;
w[0] = sa;
if (n[0] <= snum && _ntl_zcompare(n, num_bound) <= 0) break;
fast = 0;
sa = a[0];
sn = n[0];
diff = sa - sn;
if (!diff || diff == 1)
{
sa = a[0];
p = &a[sa];
num = (double) (*p) * NTL_FRADIX;
if (sa > 1)
num += (*(--p));
num *= NTL_FRADIX;
if (sa > 2)
num += (*(p - 1));
sn = n[0];
p = &n[sn];
den = (double) (*p) * NTL_FRADIX;
if (sn > 1)
den += (*(--p));
den *= NTL_FRADIX;
if (sn > 2)
den += (*(p - 1));
hi = fhi1 * (num + 1.0) / den;
lo = flo1 * num / (den + 1.0);
if (diff > 0)
{
hi *= NTL_FRADIX;
lo *= NTL_FRADIX;
}
if (hi < NTL_FRADIX)
{
ilo = (long)lo;
if (ilo == (long)hi)
fast = 1;
}
}
if (fast)
{
if (ilo != 0) {
if (ilo == 1) {
_ntl_zsub(inv, w, &inv);
_ntl_zsubpos(a, n, &a);
}
else if (ilo == 2) {
_ntl_z2mul(w, &x);
_ntl_zsub(inv, x, &inv);
_ntl_z2mul(n, &x);
_ntl_zsubpos(a, x, &a);
}
else if (ilo ==3) {
_ntl_z2mul(w, &x);
_ntl_zadd(w, x, &x);
_ntl_zsub(inv, x, &inv);
_ntl_z2mul(n, &x);
_ntl_zadd(n, x, &x);
_ntl_zsubpos(a, x, &a);
}
else if (ilo == 4) {
_ntl_zlshift(w, 2, &x);
_ntl_zsub(inv, x, &inv);
_ntl_zlshift(n, 2, &x);
_ntl_zsubpos(a, x, &a);
}
else {
_ntl_zsmul(w, ilo, &x);
_ntl_zsub(inv, x, &inv);
_ntl_zsmul(n, ilo, &x);
_ntl_zsubpos(a, x, &a);
}
}
}
else {
_ntl_zdiv(a, n, &q, &a);
_ntl_zmul(q, w, &x);
_ntl_zsub(inv, x, &inv);
}
_ntl_zswap(&a, &n);
_ntl_zswap(&inv, &w);
}
if (_ntl_zsign(w) < 0) {
_ntl_znegate(&w);
_ntl_znegate(&n);
}
_ntl_zcopy(n, num_out);
_ntl_zcopy(w, den_out);
return 1;
}
static
long OptWinSize(long n)
/* finds k that minimizes n/(k+1) + 2^{k-1} */
{
long k;
double v, v_new;
v = n/2.0 + 1.0;
k = 1;
for (;;) {
v_new = n/((double)(k+2)) + ((double)(1L << k));
if (v_new >= v) break;
v = v_new;
k++;
}
return k;
}
static
void _ntl_zsppowermod(long a, _ntl_verylong e, _ntl_verylong n,
_ntl_verylong *x)
{
_ntl_verylong res;
long i, k;
if (_ntl_ziszero(e)) {
_ntl_zone(x);
return;
}
res = 0;
_ntl_zsetlength(&res, n[0]);
_ntl_zone(&res);
k = _ntl_z2log(e);
for (i = k - 1; i >= 0; i--) {
_ntl_zsqmod(res, n, &res);
if (_ntl_zbit(e, i))
_ntl_zsmulmod(res, a, n, &res);
}
if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, n, &res);
_ntl_zcopy(res, x);
_ntl_zfree(&res);
}
static
void _ntl_ztwopowermod( _ntl_verylong e, _ntl_verylong n,
_ntl_verylong *x)
{
_ntl_verylong res;
long i, k;
if (_ntl_ziszero(e)) {
_ntl_zone(x);
return;
}
res = 0;
_ntl_zsetlength(&res, n[0]);
_ntl_zone(&res);
k = _ntl_z2log(e);
for (i = k - 1; i >= 0; i--) {
_ntl_zsqmod(res, n, &res);
if (_ntl_zbit(e, i))
_ntl_zaddmod(res, res, n, &res);
}
if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, n, &res);
_ntl_zcopy(res, x);
_ntl_zfree(&res);
}
void _ntl_zpowermod(_ntl_verylong g, _ntl_verylong e, _ntl_verylong F,
_ntl_verylong *h)
/* h = g^e mod f using "sliding window" algorithm
remark: the notation (h, g, e, F) is strange, because I
copied the code from BB.c.
*/
{
_ntl_verylong res, *v, t;
long n, i, k, val, cnt, m;
if (_ntl_zsign(g) < 0 || _ntl_zcompare(g, F) >= 0 ||
_ntl_zscompare(F, 1) <= 0)
zhalt("PowerMod: bad args");
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack) {
res = 0;
_ntl_gmp_powermod(&res, g, e, F);
/* careful...don't compute directly into h, as it could lead
* to an allocation error in some cases.
*/
_ntl_zcopy(res, h);
_ntl_zfree(&res);
return;
}
#endif
if (!g || g[0] == 1 || g[0] == -1) {
long gg = _ntl_ztoint(g);
if (gg == 2)
_ntl_ztwopowermod(e, F, h);
else
_ntl_zsppowermod(gg, e, F, h);
return;
}
if (_ntl_zscompare(e, 0) == 0) {
_ntl_zone(h);
return;
}
if (_ntl_zscompare(e, 1) == 0) {
_ntl_zcopy(g, h);
return;
}
if (_ntl_zscompare(e, -1) == 0) {
_ntl_zinvmod(g, F, h);
return;
}
if (_ntl_zscompare(e, 2) == 0) {
_ntl_zsqmod(g, F, h);
return;
}
if (_ntl_zscompare(e, -2) == 0) {
res = 0;
_ntl_zsqmod(g, F, &res);
_ntl_zinvmod(res, F, h);
_ntl_zfree(&res);
return;
}
n = _ntl_z2log(e);
res = 0;
_ntl_zone(&res);
if (n < 16) {
/* plain square-and-multiply algorithm */
for (i = n - 1; i >= 0; i--) {
_ntl_zsqmod(res, F, &res);
if (_ntl_zbit(e, i))
_ntl_zmulmod(res, g, F, &res);
}
if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, F, &res);
_ntl_zcopy(res, h);
_ntl_zfree(&res);
return;
}
k = OptWinSize(n);
if (k > 5) k = 5;
v = (_ntl_verylong *) NTL_MALLOC(1L << (k-1), sizeof(_ntl_verylong), 0);
if (!v) zhalt("out of memory");
for (i = 0; i < (1L << (k-1)); i++)
v[i] = 0;
_ntl_zcopy(g, &v[0]);
if (k > 1) {
t = 0;
_ntl_zsqmod(g, F, &t);
for (i = 1; i < (1L << (k-1)); i++)
_ntl_zmulmod(v[i-1], t, F, &v[i]);
_ntl_zfree(&t);
}
val = 0;
for (i = n-1; i >= 0; i--) {
val = (val << 1) | _ntl_zbit(e, i);
if (val == 0)
_ntl_zsqmod(res, F, &res);
else if (val >= (1L << (k-1)) || i == 0) {
cnt = 0;
while ((val & 1) == 0) {
val = val >> 1;
cnt++;
}
m = val;
while (m > 0) {
_ntl_zsqmod(res, F, &res);
m = m >> 1;
}
_ntl_zmulmod(res, v[val >> 1], F, &res);
while (cnt > 0) {
_ntl_zsqmod(res, F, &res);
cnt--;
}
val = 0;
}
}
if (_ntl_zsign(e) < 0) _ntl_zinvmod(res, F, &res);
_ntl_zcopy(res, h);
_ntl_zfree(&res);
for (i = 0; i < (1L << (k-1)); i++)
_ntl_zfree(&v[i]);
free(v);
}
void
_ntl_zexp(
_ntl_verylong a,
long e,
_ntl_verylong *bb
)
{
long k;
long len_a;
long sa;
static _ntl_verylong res = 0;
if (!a)
sa = 0;
else {
sa = a[0];
if (sa < 0) sa = -sa;
}
if (sa <= 1) {
_ntl_zexps(_ntl_ztoint(a), e, bb);
return;
}
if (!e)
{
_ntl_zone(bb);
return;
}
if (e < 0)
zhalt("negative exponent in _ntl_zexp");
if (_ntl_ziszero(a))
{
_ntl_zzero(bb);
return;
}
len_a = _ntl_z2log(a);
if (len_a > (NTL_MAX_LONG-(NTL_NBITS-1))/e)
zhalt("overflow in _ntl_zexp");
_ntl_zsetlength(&res, (len_a*e+NTL_NBITS-1)/NTL_NBITS);
_ntl_zcopy(a, &res);
k = 1;
while ((k << 1) <= e)
k <<= 1;
while (k >>= 1) {
_ntl_zsq(res, &res);
if (e & k)
_ntl_zmul(a, res, &res);
}
_ntl_zcopy(res, bb);
}
void
_ntl_zexps(
long a,
long e,
_ntl_verylong *bb
)
{
long k;
long len_a;
static _ntl_verylong res = 0;
if (!e)
{
_ntl_zone(bb);
return;
}
if (e < 0)
zhalt("negative exponent in _ntl_zexps");
if (!a)
{
_ntl_zzero(bb);
return;
}
if (a >= NTL_RADIX || a <= -NTL_RADIX) {
_ntl_zintoz(a, &res);
_ntl_zexp(res, e, &res);
return;
}
len_a = _ntl_z2logs(a);
if (len_a > (NTL_MAX_LONG-(NTL_NBITS-1))/e)
zhalt("overflow in _ntl_zexps");
_ntl_zsetlength(&res, (len_a*e+NTL_NBITS-1)/NTL_NBITS);
_ntl_zintoz(a, &res);
k = 1;
while ((k << 1) <= e)
k <<= 1;
while (k >>= 1) {
_ntl_zsq(res, &res);
if (e & k)
_ntl_zsmul(res, a, &res);
}
_ntl_zcopy(res, bb);
}
void
_ntl_z2mul(
_ntl_verylong n,
_ntl_verylong *rres
)
{
long sn;
long i;
long n_alias;
long carry;
_ntl_verylong res;
if (!n)
{
_ntl_zzero(rres);
return;
}
if ((!n[1]) && (n[0] == 1))
{
_ntl_zzero(rres);
return;
}
if ((sn = n[0]) < 0)
sn = -sn;
res = *rres;
n_alias = (n == res);
_ntl_zsetlength(&res, sn + 1);
if (n_alias) n = res;
*rres = res;
carry = 0;
for (i = 1; i <= sn; i++)
{
if ((res[i] = (n[i] << 1) + carry) >= NTL_RADIX)
{
res[i] -= NTL_RADIX;
carry = 1;
}
else
carry = 0;
}
if (carry)
res[++sn] = 1;
if (n[0] < 0)
res[0] = -sn;
else
res[0] = sn;
}
long
_ntl_z2div(
_ntl_verylong n,
_ntl_verylong *rres
)
{
long sn;
long i;
long result;
_ntl_verylong res = *rres;
if ((!n) || ((!n[1]) && (n[0] == 1)))
{
_ntl_zzero(rres);
return (0);
}
if ((sn = n[0]) < 0)
sn = -sn;
/* n won't move if res aliases n */
_ntl_zsetlength(&res, sn);
*rres = res;
result = n[1] & 1;
for (i = 1; i < sn; i++)
{
res[i] = (n[i] >> 1);
if (n[i + 1] & 1)
res[i] += (NTL_RADIX >> 1);
}
if (res[sn] = (n[sn] >> 1))
res[0] = n[0];
else if (sn == 1)
{
res[0] = 1;
}
else if (n[0] < 0)
res[0] = -sn + 1;
else
res[0] = sn - 1;
return (result);
}
void
_ntl_zlshift(
_ntl_verylong n,
long k,
_ntl_verylong *rres
)
{
long big;
long small;
long sn;
long i;
long cosmall;
long n_alias;
_ntl_verylong res;
if (!n)
{
_ntl_zzero(rres);
return;
}
if ((!n[1]) && (n[0] == 1))
{
_ntl_zzero(rres);
return;
}
res = *rres;
n_alias = (n == res);
if (!k)
{
if (!n_alias)
_ntl_zcopy(n, rres);
return;
}
if (k < 0)
{
if (k < -NTL_MAX_LONG)
_ntl_zzero(rres);
else
_ntl_zrshift(n, -k, rres);
return;
}
if (k == 1)
{
_ntl_z2mul(n, rres);
return;
}
if ((sn = n[0]) < 0)
sn = -sn;
i = sn + (big = k / NTL_NBITS);
if (small = k - big * NTL_NBITS)
{
_ntl_zsetlength(&res, i + 1);
if (n_alias) n = res;
*rres = res;
res[i + 1] = n[sn] >> (cosmall = NTL_NBITS - small);
for (i = sn; i > 1; i--)
res[i + big] = ((((unsigned long) n[i]) << small) & NTL_RADIXM) + (n[i - 1] >> cosmall);
res[big + 1] = (((unsigned long) n[1]) << small) & NTL_RADIXM;
for (i = big; i; i--)
res[i] = 0;
if (res[sn + big + 1])
big++;
}
else
{
_ntl_zsetlength(&res, i);
if (n_alias) n = res;
*rres = res;
for (i = sn; i; i--)
res[i + big] = n[i];
for (i = big; i; i--)
res[i] = 0;
}
if (n[0] > 0)
res[0] = n[0] + big;
else
res[0] = n[0] - big;
}
void
_ntl_zrshift(
_ntl_verylong n,
long k,
_ntl_verylong *rres
)
{
long big;
long small;
long sn;
long i;
long cosmall;
_ntl_verylong res;
if (!n)
{
_ntl_zzero(rres);
return;
}
if ((!n[1]) && (n[0] == 1))
{
_ntl_zzero(rres);
return;
}
res = *rres;
if (!k)
{
if (n != res)
_ntl_zcopy(n, rres);
return;
}
if (k < 0)
{
if (k < -NTL_MAX_LONG) zhalt("overflow in _ntl_zrshift");
_ntl_zlshift(n, -k, rres);
return;
}
if (k == 1)
{
_ntl_z2div(n, rres);
return;
}
big = k / NTL_NBITS;
small = k - big * NTL_NBITS;
if ((sn = n[0]) < 0)
sn = -sn;
if ((big >= sn) ||
((big == sn - 1) && small && (!(n[sn] >> small))))
/* The microsoft optimizer generates bad code without
the above test for small != 0 */
{
_ntl_zzero(rres);
return;
}
sn -= big;
/* n won't move if res aliases n */
_ntl_zsetlength(&res, sn);
*rres = res;
if (small)
{
cosmall = NTL_NBITS - small;
for (i = 1; i < sn; i++)
res[i] = (n[i + big] >> small) +
((((unsigned long) n[i + big + 1]) << cosmall) & NTL_RADIXM);
if (!(res[sn] = (n[sn + big] >> small)))
sn--;
}
else
for (i = 1; i <= sn; i++)
res[i] = n[i + big];
if (n[0] > 0)
res[0] = sn;
else
res[0] = -sn;
}
long
_ntl_zmakeodd(
_ntl_verylong *nn
)
{
_ntl_verylong n = *nn;
long i;
long shift = 1;
if (!n || (!n[1] && (n[0] == 1)))
return (0);
while (!(n[shift]))
shift++;
i = n[shift];
shift = NTL_NBITS * (shift - 1);
while (!(i & 1))
{
shift++;
i >>= 1;
}
_ntl_zrshift(n, shift, &n);
return (shift);
}
long
_ntl_znumtwos(
_ntl_verylong n
)
{
long i;
long shift = 1;
if (!n || (!n[1] && (n[0] == 1)))
return (0);
while (!(n[shift]))
shift++;
i = n[shift];
shift = NTL_NBITS * (shift - 1);
while (!(i & 1))
{
shift++;
i >>= 1;
}
return (shift);
}
long
_ntl_zsqrts(
long n
)
{
long a;
long ndiva;
long newa;
static _ntl_verylong ln=0;
static _ntl_verylong rr=0;
if (n < 0)
zhalt("_ntl_zsqrts: negative argument");
if (n <= 0)
return (0);
if (n <= 3)
return (1);
if (n <= 8)
return (2);
if (n >= NTL_RADIX)
{
_ntl_zintoz(n,&ln);
_ntl_zsqrt(ln,&rr);
return(_ntl_ztoint(rr));
}
newa = 3L << (2 * (NTL_NBITSH - 1));
a = 1L << NTL_NBITSH;
while (!(n & newa))
{
newa >>= 2;
a >>= 1;
}
while (1)
{
newa = ((ndiva = n / a) + a) / 2;
if (newa - ndiva <= 1)
{
if (newa * newa <= n)
return (newa);
else
return (ndiva);
}
a = newa;
}
}
void _ntl_zsqrt(_ntl_verylong n, _ntl_verylong *rr)
{
static _ntl_verylong a = 0;
static _ntl_verylong ndiva = 0;
static _ntl_verylong diff = 0;
static _ntl_verylong r = 0;
long i;
if (!n) {
_ntl_zzero(rr);
return;
}
if ((i = n[0]) < 0)
zhalt("negative argument in _ntl_zsqrt");
if (i == 1) {
_ntl_zintoz(_ntl_zsqrts(n[1]), rr);
return;
}
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && i >= NTL_GMP_SQRT_CROSS) {
GT_INIT
lip_to_mpz(n, gt_1);
mpz_sqrt(gt_2, gt_1);
mpz_to_lip(&r, gt_2);
_ntl_zcopy(r, rr);
return;
}
#endif
_ntl_zsetlength(&a, i);
_ntl_zsetlength(&ndiva, i);
_ntl_zsetlength(&diff, i);
a[(a[0] = (i + 1) / 2)] = _ntl_zsqrts(n[i]) + 1;
if (!(i & 1))
a[a[0]] <<= NTL_NBITSH;
if (a[a[0]] & NTL_RADIX) {
a[a[0]] = 0;
a[0]++;
a[a[0]] = 1;
}
for (i = a[0] - 1; i; i--)
a[i] = 0;
while (1) {
_ntl_zdiv(n, a, &ndiva, &r);
_ntl_zadd(a, ndiva, &r);
_ntl_zrshift(r, 1, &r);
if (_ntl_zcompare(r, ndiva) <= 0)
goto done;
_ntl_zsubpos(r, ndiva, &diff);
if ((diff[0] == 1) && (diff[1] <= 1)) {
_ntl_zsq(r, &diff);
if (_ntl_zcompare(diff, n) > 0)
_ntl_zcopy(ndiva, &r);
goto done;
}
_ntl_zcopy(r, &a);
}
done:
_ntl_zcopy(r, rr);
}
void
_ntl_zgcd(
_ntl_verylong mm1,
_ntl_verylong mm2,
_ntl_verylong *rres
)
{
long agrb;
long shibl;
static _ntl_verylong aa = 0;
static _ntl_verylong bb = 0;
static _ntl_verylong cc = 0;
_ntl_verylong a;
_ntl_verylong b;
_ntl_verylong c;
_ntl_verylong d;
long m1negative;
long m2negative;
/* _ntl_ziszero is necessary here and below to fix an
an aliasing bug in LIP */
if (_ntl_ziszero(mm1))
{
if (mm2 != *rres)
_ntl_zcopy(mm2,rres);
_ntl_zabs(rres);
return;
}
if (_ntl_ziszero(mm2))
{
if (mm1 != *rres)
_ntl_zcopy(mm1,rres);
_ntl_zabs(rres);
return;
}
if (mm1 == mm2)
{
if (mm1 != *rres)
_ntl_zcopy(mm1, rres);
_ntl_zabs(rres);
return;
}
#ifdef NTL_GMP_HACK
{
long s1 = mm1[0];
long s2 = mm2[0];
if (s1 < 0) s1 = -s1;
if (s2 < 0) s2 = -s2;
if (_ntl_gmp_hack && s1 >= NTL_GMP_GCD_CROSS && s2 >= NTL_GMP_GCD_CROSS) {
GT_INIT
lip_to_mpz(mm1, gt_1);
lip_to_mpz(mm2, gt_2);
mpz_gcd(gt_3, gt_1, gt_2);
mpz_to_lip(&aa, gt_3);
_ntl_zcopy(aa, rres);
return;
}
}
#endif
if (m1negative = (mm1[0] < 0))
mm1[0] = -mm1[0];
if (m2negative = (mm2[0] < 0))
mm2[0] = -mm2[0];
if ((agrb = mm1[0]) < mm2[0])
agrb = mm2[0];
_ntl_zsetlength(&aa, agrb+1);
_ntl_zsetlength(&bb, agrb+1);
_ntl_zsetlength(&cc, agrb+1);
if (mm1[0] != mm2[0])
{
if (mm1[0] > mm2[0])
{
_ntl_zcopy(mm2, &aa);
_ntl_zmod(mm1, aa, &bb);
}
else
{
_ntl_zcopy(mm1, &aa);
_ntl_zmod(mm2, aa, &bb);
}
if (!(bb[1]) && (bb[0] == 1))
{
a = aa;
goto done;
}
}
else
{
_ntl_zcopy(mm1, &aa);
_ntl_zcopy(mm2, &bb);
}
if ((agrb = _ntl_zmakeodd(&aa)) < (shibl = _ntl_zmakeodd(&bb)))
shibl = agrb;
if (!(agrb = _ntl_zcompare(aa, bb)))
{
a = aa;
goto endshift;
}
else if (agrb < 0)
{
a = bb;
b = aa;
}
else
{
a = aa;
b = bb;
}
c = cc;
_ntl_zsubpos(a, b, &c);
do
{
_ntl_zmakeodd(&c);
if (!(agrb = _ntl_zcompare(b, c)))
{
a = b;
goto endshift;
}
else if (agrb > 0)
{
a = b;
b = c;
c = a;
}
else
{
d = a;
a = c;
c = d;
}
_ntl_zsubpos(a, b, &c);
} while (c[1] || c[0] != 1);
endshift:
_ntl_zlshift(a, shibl, &a);
done:
if (m1negative)
mm1[0] = -mm1[0];
if (m2negative)
mm2[0] = -mm2[0];
_ntl_zcopy(a, rres);
}
long _ntl_zsign(_ntl_verylong a)
{
if (!a)
{
return (0);
}
if (a[0] < 0)
return (-1);
if (a[0] > 1)
return (1);
if (a[1])
return (1);
return (0);
}
void _ntl_zabs(_ntl_verylong *pa)
{
_ntl_verylong a = *pa;
if (!a)
return;
if (a[0] < 0)
a[0] = (-a[0]);
}
long
_ntl_z2logs(
long aa
)
{
long i = 0;
unsigned long a;
if (aa < 0)
a = - ((unsigned long) aa);
else
a = aa;
while (a>=256)
i += 8, a >>= 8;
if (a >=16)
i += 4, a >>= 4;
if (a >= 4)
i += 2, a >>= 2;
if (a >= 2)
i += 2;
else if (a >= 1)
i++;
return (i);
}
long
_ntl_z2log(
_ntl_verylong a
)
{
long la;
if (!a)
return (0);
la = (a[0] > 0 ? a[0] : -a[0]);
return ( NTL_NBITS * (la - 1) + _ntl_z2logs(a[la]) );
}
long
_ntl_zscompare(
_ntl_verylong a,
long b
)
{
if (!b)
return _ntl_zsign(a);
else {
static _ntl_verylong c = 0;
_ntl_zintoz(b, &c);
return (_ntl_zcompare(a, c));
}
}
void
_ntl_zswap(
_ntl_verylong *a,
_ntl_verylong *b
)
{
_ntl_verylong c;
if ((*a && ((*a)[-1] & 1)) || (*b && ((*b)[-1] & 1))) {
static _ntl_verylong t = 0;
_ntl_zcopy(*a, &t);
_ntl_zcopy(*b, a);
_ntl_zcopy(t, b);
return;
}
c = *a;
*a = *b;
*b = c;
}
long
_ntl_ziszero(
_ntl_verylong a
)
{
if (!a) return (1);
if (a[1]) return (0);
if (a[0]==1) return (1);
return (0);
}
long
_ntl_zodd(
_ntl_verylong a
)
{
if (!a) return (0);
return (a[1]&1);
}
long
_ntl_zbit(
_ntl_verylong a,
long p
)
{
long bl;
long wh;
long sa;
if (p < 0 || !a) return 0;
bl = (p/NTL_NBITS);
wh = 1L << (p - NTL_NBITS*bl);
bl ++;
sa = a[0];
if (sa < 0) sa = -sa;
if (sa < bl) return (0);
if (a[bl] & wh) return (1);
return (0);
}
void
_ntl_zlowbits(
_ntl_verylong a,
long b,
_ntl_verylong *cc
)
{
_ntl_verylong c;
long bl;
long wh;
long sa;
if (_ntl_ziszero(a) || (b<=0)) {
_ntl_zzero(cc);
return;
}
bl = b/NTL_NBITS;
wh = b - NTL_NBITS*bl;
if (wh != 0)
bl++;
else
wh = NTL_NBITS;
sa = a[0];
if (sa < 0) sa = -sa;
if (sa < bl) {
_ntl_zcopy(a,cc);
_ntl_zabs(cc);
return;
}
c = *cc;
/* a won't move if c aliases a */
_ntl_zsetlength(&c, bl);
*cc = c;
for (sa=1; sa<bl; sa++)
c[sa] = a[sa];
c[bl] = a[bl]&((1L<<wh)-1);
while ((bl>1) && (!c[bl]))
bl --;
c[0] = bl;
}
long _ntl_zslowbits(_ntl_verylong a, long p)
{
static _ntl_verylong x = 0;
if (p > NTL_BITS_PER_LONG)
p = NTL_BITS_PER_LONG;
_ntl_zlowbits(a, p, &x);
return _ntl_ztoint(x);
}
long
_ntl_zweights(
long aa
)
{
unsigned long a;
long res = 0;
if (aa < 0)
a = - ((unsigned long) aa);
else
a = aa;
while (a) {
if (a & 1) res ++;
a >>= 1;
}
return (res);
}
long
_ntl_zweight(
_ntl_verylong a
)
{
long i;
long res = 0;
if (!a) return (0);
i = a[0];
if (i<0) i = -i;
for (;i;i--)
res += _ntl_zweights(a[i]);
return (res);
}
void
_ntl_zand(
_ntl_verylong a,
_ntl_verylong b,
_ntl_verylong *cc
)
{
_ntl_verylong c;
long sa;
long sb;
long sm;
long a_alias;
long b_alias;
if (_ntl_ziszero(a) || _ntl_ziszero(b)) {
_ntl_zzero(cc);
return;
}
c = *cc;
a_alias = (a == c);
b_alias = (b == c);
sa = a[0];
if (sa < 0) sa = -sa;
sb = b[0];
if (sb < 0) sb = -sb;
sm = (sa > sb ? sb : sa );
_ntl_zsetlength(&c, sm);
if (a_alias) a = c;
if (b_alias) b = c;
*cc = c;
for (sa = 1; sa <= sm; sa ++)
c[sa] = a[sa] & b[sa];
while ((sm > 1) && (!(c[sm])))
sm --;
c[0] = sm;
}
void
_ntl_zxor(
_ntl_verylong a,
_ntl_verylong b,
_ntl_verylong *cc
)
{
_ntl_verylong c;
long sa;
long sb;
long sm;
long la;
long i;
long a_alias;
long b_alias;
if (_ntl_ziszero(a)) {
_ntl_zcopy(b,cc);
_ntl_zabs(cc);
return;
}
if (_ntl_ziszero(b)) {
_ntl_zcopy(a,cc);
_ntl_zabs(cc);
return;
}
c = *cc;
a_alias = (a == c);
b_alias = (b == c);
sa = a[0];
if (sa < 0) sa = -sa;
sb = b[0];
if (sb < 0) sb = -sb;
if (sa > sb) {
la = sa;
sm = sb;
} else {
la = sb;
sm = sa;
}
_ntl_zsetlength(&c, la);
if (a_alias) a = c;
if (b_alias) b = c;
*cc = c;
for (i = 1; i <= sm; i ++)
c[i] = a[i] ^ b[i];
if (sa > sb)
for (;i <= la; i++) c[i] = a[i];
else
for (;i <= la; i++) c[i] = b[i];
while ((la > 1) && (!(c[la])))
la --;
c[0] = la;
}
void
_ntl_zor(
_ntl_verylong a,
_ntl_verylong b,
_ntl_verylong *cc
)
{
_ntl_verylong c;
long sa;
long sb;
long sm;
long la;
long i;
long a_alias;
long b_alias;
if (_ntl_ziszero(a)) {
_ntl_zcopy(b,cc);
_ntl_zabs(cc);
return;
}
if (_ntl_ziszero(b)) {
_ntl_zcopy(a,cc);
_ntl_zabs(cc);
return;
}
c = *cc;
a_alias = (a == c);
b_alias = (b == c);
sa = a[0];
if (sa < 0) sa = -sa;
sb = b[0];
if (sb < 0) sb = -sb;
if (sa > sb) {
la = sa;
sm = sb;
} else {
la = sb;
sm = sa;
}
_ntl_zsetlength(&c, la);
if (a_alias) a = c;
if (b_alias) b = c;
*cc = c;
for (i = 1; i <= sm; i ++)
c[i] = a[i] | b[i];
if (sa > sb)
for (;i <= la; i++) c[i] = a[i];
else
for (;i <= la; i++) c[i] = b[i];
c[0] = la;
}
long
_ntl_zsetbit(
_ntl_verylong *a,
long b
)
{
long bl;
long wh;
long sa;
if (b<0) zhalt("_ntl_zsetbit: negative index");
if (_ntl_ziszero(*a)) {
_ntl_zintoz(1,a);
_ntl_zlshift(*a,b,a);
return (0);
}
bl = (b/NTL_NBITS);
wh = 1L << (b - NTL_NBITS*bl);
bl ++;
sa = (*a)[0];
if (sa<0) sa = -sa;
if (sa >= bl) {
sa = (*a)[bl] & wh;
(*a)[bl] |= wh;
if (sa) return (1);
return (0);
} else {
_ntl_zsetlength(a,bl);
sa ++;
for (;sa<=bl;sa++) (*a)[sa]=0;
if ((*a)[0] < 0)
(*a)[0] = -bl;
else (*a)[0] = bl;
(*a)[bl] |= wh;
return (0);
}
}
long
_ntl_zswitchbit(
_ntl_verylong *a,
long p
)
{
long bl;
long wh;
long sa;
if (p < 0) zhalt("_ntl_zswitchbit: negative index");
if (_ntl_ziszero(*a)) {
_ntl_zintoz(1,a);
_ntl_zlshift(*a,p,a);
return (0);
}
bl = (p/NTL_NBITS);
wh = 1L << (p - NTL_NBITS*bl);
bl ++;
sa = (*a)[0];
if (sa < 0) sa = -sa;
if ((sa < bl) || (!((*a)[bl] & wh))) {
_ntl_zsetbit(a,p);
return (0);
}
(*a)[bl] ^= wh;
while ((sa>1) && (!(*a)[sa]))
sa --;
if ((*a)[0] > 0) (*a)[0] = sa;
else (*a)[0] = -sa;
return (1);
}
long _ntl_zsize(_ntl_verylong rep)
{
if (!rep || (rep[0] == 1 && rep[1] == 0))
return 0;
else if (rep[0] < 0)
return -rep[0];
else
return rep[0];
}
long _ntl_zdigit(_ntl_verylong rep, long i)
{
long sa;
if (i < 0 || !rep) return 0;
sa = rep[0];
if (sa < 0) sa = -sa;
if (i >= sa) return 0;
return rep[i+1];
}
long _ntl_zisone(_ntl_verylong rep)
{
return rep != 0 && rep[0] == 1 && rep[1] == 1;
}
long _ntl_zsptest(_ntl_verylong rep)
{
return !rep || rep[0] == 1 || rep[0] == -1;
}
long _ntl_zwsptest(_ntl_verylong rep)
{
return !rep || rep[0] == 1 || rep[0] == -1;
}
long _ntl_zcrtinrange(_ntl_verylong g, _ntl_verylong a)
{
long sa, sg, carry, i, diff;
if (!a || a[0] < 0 || (a[0] == 1 && a[1] == 0)) return 0;
sa = a[0];
if (!g) return 1;
sg = g[0];
if (sg == 1 && g[1] == 0) return 1;
if (sg < 0) sg = -sg;
if (sa-sg > 1) return 1;
if (sa-sg < 0) return 0;
carry=0;
if (sa-sg == 1) {
if (a[sa] > 1) return 1;
carry = 1;
}
i = sg;
diff = 0;
while (i > 0 && diff == 0) {
diff = (carry << (NTL_NBITS-1)) + (a[i] >> 1) - g[i];
carry = (a[i] & 1);
i--;
}
if (diff == 0) {
if (carry) return 1;
return (g[0] > 0);
}
else
return (diff > 0);
}
void _ntl_zfrombytes(_ntl_verylong *x, const unsigned char *p, long n)
{
long sz;
long i;
_ntl_verylong a;
long bitpos, wordpos, bitoffset, diff;
if (n <= 0) {
_ntl_zzero(x);
return;
}
if (n > (NTL_MAX_LONG-(NTL_NBITS-1))/8)
zhalt("ZZFromBytes: excessive length");
sz = (n*8 + NTL_NBITS-1)/NTL_NBITS;
_ntl_zsetlength(x, sz);
a = *x;
for (i = 1; i <= sz; i++)
a[i] = 0;
for (i = 0; i < n; i++) {
bitpos = i*8;
wordpos = bitpos/NTL_NBITS;
bitoffset = bitpos - wordpos*NTL_NBITS;
diff = NTL_NBITS-bitoffset;
if (diff < 8) {
a[wordpos+1] |=
((( ((unsigned long)(p[i])) & 255UL ) << bitoffset) & NTL_RADIXM);
a[wordpos+2] = ( ((long)(p[i])) & 255 ) >> diff;
}
else {
a[wordpos+1] |= (( ((long)(p[i])) & 255 ) << bitoffset);
}
}
while (sz > 1 && a[sz] == 0) sz--;
a[0] = sz;
}
void _ntl_zbytesfromz(unsigned char *p, _ntl_verylong a, long nn)
{
long k = _ntl_z2log(a);
long n = (k+7)/8;
long sz = _ntl_zsize(a);
long min_n = ((n < nn) ? n : nn);
long i;
for (i = 0; i < min_n; i++) {
long bitpos = i*8;
long wordpos = bitpos/NTL_NBITS;
long bitoffset = bitpos - wordpos*NTL_NBITS;
long diff;
p[i] = (a[wordpos+1] >> bitoffset) & 255;
diff = NTL_NBITS - bitoffset;
if (diff < 8 && wordpos < sz-1) {
long msk = (1L << (8-diff))-1;
p[i] |= ((a[wordpos+2] & msk) << diff);
}
}
for (i = min_n; i < nn; i++)
p[i] = 0;
}
long _ntl_zblock_construct_alloc(_ntl_verylong *x, long d, long n)
{
long nwords, nbytes, AllocAmt, m, *p, *q, j;
/* check n value */
if (n <= 0)
zhalt("block construct: n must be positive");
/* check d value */
if (d <= 0)
zhalt("block construct: d must be positive");
if (NTL_OVERFLOW(d, NTL_NBITS, NTL_NBITS) ||
NTL_OVERFLOW(d, sizeof(long), 3*sizeof(long)))
zhalt("block construct: d too large");
#ifdef L_TO_G_CHECK_LEN
/* this makes sure that numbers don't get too big for GMP */
if (d >= (1L << (NTL_BITS_PER_INT-4)))
zhalt("size too big for GMP");
#endif
nwords = d + 3;
nbytes = nwords*sizeof(long);
AllocAmt = (NTL_MAX_ALLOC_BLOCK - sizeof(long)) / nbytes;
if (AllocAmt == 0) AllocAmt = 1;
if (AllocAmt < n)
m = AllocAmt;
else
m = n;
p = (long *) NTL_MALLOC(m, nbytes, sizeof(long));
if (!p) zhalt("out of memory in block construct");
*p = m;
q = p+2;
*x = q;
for (j = 0; j < m; j++) {
q[-1] = ((d+1) << 1) | 1;
q[0] = 1;
q[1] = 0;
q += nwords;
}
return m;
}
void _ntl_zblock_construct_set(_ntl_verylong x, _ntl_verylong *y, long i)
{
long d, size;
d = (x[-1] >> 1) - 1;
size = d + 3;
*y = x + i*size;
}
long _ntl_zblock_destroy(_ntl_verylong x)
{
long m, *p;
p = x - 2;
m = *p;
free(p);
return m;
}
long _ntl_zblock_storage(long d)
{
long size = d+3;
return size * (sizeof (long)) + sizeof(_ntl_verylong);
}
/* The following routines provide special support for ZZ_pX
* arithmetic. */
/* this is a generic single-precision mul mod that will work
* on any platform */
#define SP_MUL_MOD(r, a, b, n) \
{ \
long l__a = (a); \
long l__b = (b); \
long l__n = (n); \
long l__q; \
unsigned long l__res; \
\
l__q = (long) ((((double) l__a) * ((double) l__b)) / ((double) l__n)); \
l__res = ((unsigned long) l__a)*((unsigned long) l__b) - \
((unsigned long) l__q)*((unsigned long) l__n); \
if (l__res >> (NTL_BITS_PER_LONG-1)) \
l__res += l__n; \
else if (((long) l__res) >= l__n) \
l__res -= l__n; \
\
r = (long) l__res; \
}
static
void sp_ext_eucl(long *dd, long *ss, long *tt, long a, long b)
{
long u, v, u0, v0, u1, v1, u2, v2, q, r;
long aneg = 0, bneg = 0;
if (a < 0) {
if (a < -NTL_MAX_LONG) zhalt("integer overflow");
a = -a;
aneg = 1;
}
if (b < 0) {
if (b < -NTL_MAX_LONG) zhalt("integer overflow");
b = -b;
bneg = 1;
}
u1=1; v1=0;
u2=0; v2=1;
u = a; v = b;
while (v != 0) {
q = u / v;
r = u % v;
u = v;
v = r;
u0 = u2;
v0 = v2;
u2 = u1 - q*u2;
v2 = v1- q*v2;
u1 = u0;
v1 = v0;
}
if (aneg)
u1 = -u1;
if (bneg)
v1 = -v1;
*dd = u;
*ss = u1;
*tt = v1;
}
static
long sp_inv_mod(long a, long n)
{
long d, s, t;
sp_ext_eucl(&d, &s, &t, a, n);
if (d != 1) zhalt("inverse undefined");
if (s < 0)
return s + n;
else
return s;
}
/* Data structures and algorithms for fast Chinese Remaindering */
struct crt_body_lip {
_ntl_verylong *v;
long sbuf;
long n;
};
#ifdef NTL_GMP_HACK
struct crt_body_gmp {
MP_INT *v;
long sbuf;
long n;
mpz_t buf;
};
struct crt_body_gmp1 {
long n;
long levels;
long *primes;
long *inv_vec;
long *val_vec;
long *index_vec;
MP_INT *prod_vec;
MP_INT *rem_vec;
MP_INT *coeff_vec;
MP_INT temps[2];
mpz_t modulus;
};
#endif
struct crt_body {
long strategy;
union {
struct crt_body_lip L;
#ifdef NTL_GMP_HACK
struct crt_body_gmp G;
struct crt_body_gmp1 G1;
#endif
} U;
};
long _ntl_crt_struct_special(void *crt_struct)
{
struct crt_body *c = (struct crt_body *) crt_struct;
return (c->strategy == 2);
}
void _ntl_crt_struct_init(void **crt_struct, long n, _ntl_verylong p,
const long *primes)
{
struct crt_body *c;
c = (struct crt_body *) NTL_MALLOC(1, sizeof(struct crt_body), 0);
if (!c) zhalt("out of memory");
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && n >= 600) {
struct crt_body_gmp1 *C = &c->U.G1;
long *q;
long i, j;
long levels, vec_len;
long *val_vec, *inv_vec;
long *index_vec;
MP_INT *prod_vec, *rem_vec, *coeff_vec;
MP_INT *temps;
mpz_init(C->modulus);
lip_to_mpz(p, C->modulus);
temps = &C->temps[0];
mpz_init(&temps[0]);
mpz_init(&temps[1]);
q = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!q) zhalt("out of memory");
val_vec = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!val_vec) zhalt("out of memory");
inv_vec = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!inv_vec) zhalt("out of memory");
for (i = 0; i < n; i++)
q[i] = primes[i];
levels = 0;
while ((n >> levels) >= 16) levels++;
vec_len = (1L << levels) - 1;
index_vec = (long *) NTL_MALLOC((vec_len+1), sizeof(long), 0);
if (!index_vec) zhalt("out of memory");
prod_vec = (MP_INT *) NTL_MALLOC(vec_len, sizeof(MP_INT), 0);
if (!prod_vec) zhalt("out of memory");
rem_vec = (MP_INT *) NTL_MALLOC(vec_len, sizeof(MP_INT), 0);
if (!rem_vec) zhalt("out of memory");
coeff_vec = (MP_INT *) NTL_MALLOC(n, sizeof(MP_INT), 0);
if (!coeff_vec) zhalt("out of memory");
for (i = 0; i < vec_len; i++)
mpz_init(&prod_vec[i]);
for (i = 0; i < vec_len; i++)
mpz_init(&rem_vec[i]);
for (i = 0; i < n; i++)
mpz_init(&coeff_vec[i]);
index_vec[0] = 0;
index_vec[1] = n;
for (i = 0; i <= levels-2; i++) {
long start = (1L << i) - 1;
long finish = (1L << (i+1)) - 2;
for (j = finish; j >= start; j--) {
index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2;
index_vec[2*j+1] = index_vec[j];
}
index_vec[2*finish+3] = n;
}
for (i = (1L << (levels-1)) - 1; i < vec_len; i++) {
/* multiply primes index_vec[i]..index_vec[i+1]-1 into
* prod_vec[i]
*/
mpz_set_ui(&prod_vec[i], 1);
for (j = index_vec[i]; j < index_vec[i+1]; j++)
mpz_mul_ui(&prod_vec[i], &prod_vec[i], q[j]);
}
for (i = (1L << (levels-1)) - 1; i < vec_len; i++) {
for (j = index_vec[i]; j < index_vec[i+1]; j++)
mpz_fdiv_q_ui(&coeff_vec[j], &prod_vec[i], q[j]);
}
for (i = (1L << (levels-1)) - 2; i >= 0; i--)
mpz_mul(&prod_vec[i], &prod_vec[2*i+1], &prod_vec[2*i+2]);
/* the following is asymptotically the bottleneck...but it
* it probably doesn't matter. */
for (i = 0; i < n; i++) {
long tt;
mpz_fdiv_q_ui(&temps[0], &prod_vec[0], q[i]);
tt = mpn_mod_1(temps[0]._mp_d, temps[0]._mp_size, q[i]);
inv_vec[i] = sp_inv_mod(tt, q[i]);
}
c->strategy = 2;
C->n = n;
C->primes = q;
C->val_vec = val_vec;
C->inv_vec = inv_vec;
C->levels = levels;
C->index_vec = index_vec;
C->prod_vec = prod_vec;
C->rem_vec = rem_vec;
C->coeff_vec = coeff_vec;
*crt_struct = (void *) c;
return;
}
if (_ntl_gmp_hack) {
struct crt_body_gmp *C = &c->U.G;
long i;
c->strategy = 1;
C->n = n;
C->v = (MP_INT *) NTL_MALLOC(n, sizeof(MP_INT), 0);
if (!C->v) zhalt("out of memory");
for (i = 0; i < n; i++)
mpz_init(&C->v[i]);
C->sbuf = L_TO_G(p[0])+2;
mpz_init(C->buf);
_mpz_realloc(C->buf, C->sbuf);
*crt_struct = (void *) c;
return;
}
#endif
{
struct crt_body_lip *C = &c->U.L;
long i;
c->strategy = 0;
C->n = n;
C->v = (_ntl_verylong *) NTL_MALLOC(n, sizeof(_ntl_verylong), 0);
if (!C->v) zhalt("out of memory");
for (i = 0; i < n; i++)
C->v[i] = 0;
C->sbuf = p[0]+3;
*crt_struct = (void *) c;
}
}
void _ntl_crt_struct_insert(void *crt_struct, long i, _ntl_verylong m)
{
struct crt_body *c = (struct crt_body *) crt_struct;
switch (c->strategy) {
case 0: {
_ntl_zcopy(m, &c->U.L.v[i]);
break;
}
#ifdef NTL_GMP_HACK
case 1: {
lip_to_mpz(m, &c->U.G.v[i]);
break;
}
#endif
default:
zhalt("_ntl_crt_struct_insert: inconsistent strategy");
} /* end switch */
}
void _ntl_crt_struct_free(void *crt_struct)
{
struct crt_body *c = (struct crt_body *) crt_struct;
switch (c->strategy) {
case 0: {
struct crt_body_lip *C = &c->U.L;
long i, n;
n = C->n;
for (i = 0; i < n; i++)
_ntl_zfree(&C->v[i]);
free(C->v);
free(c);
break;
}
#ifdef NTL_GMP_HACK
case 1: {
struct crt_body_gmp *C = &c->U.G;
long i, n;
n = C->n;
for (i = 0; i < n; i++)
mpz_clear(&C->v[i]);
mpz_clear(C->buf);
free(C->v);
free(c);
break;
}
case 2: {
struct crt_body_gmp1 *C = &c->U.G1;
long n = C->n;
long levels = C->levels;
long *primes = C->primes;
long *inv_vec = C->inv_vec;
long *val_vec = C->val_vec;
long *index_vec = C->index_vec;
MP_INT *prod_vec = C->prod_vec;
MP_INT *rem_vec = C->rem_vec;
MP_INT *coeff_vec = C->coeff_vec;
MP_INT *temps = C->temps;
MP_INT *modulus = C->modulus;
long vec_len = (1L << levels) - 1;
long i;
for (i = 0; i < vec_len; i++)
mpz_clear(&prod_vec[i]);
for (i = 0; i < vec_len; i++)
mpz_clear(&rem_vec[i]);
for (i = 0; i < n; i++)
mpz_clear(&coeff_vec[i]);
mpz_clear(&temps[0]);
mpz_clear(&temps[1]);
mpz_clear(modulus);
free(primes);
free(inv_vec);
free(val_vec);
free(index_vec);
free(prod_vec);
free(rem_vec);
free(coeff_vec);
free(c);
break;
}
#endif
default:
zhalt("_ntl_crt_struct_free: inconsistent strategy");
} /* end case */
}
#ifdef NTL_GMP_HACK
static
void mpz_add_mul_many(MP_INT *res, MP_INT *a, long *b,
long n, long sz)
{
mp_limb_t *xx, *yy;
long i, sx;
long sy;
mp_limb_t carry;
sx = sz + 2;
if (sx > res->_mp_alloc)
_mpz_realloc(res, sx);
xx = res->_mp_d;
for (i = 0; i < sx; i++)
xx[i] = 0;
for (i = 0; i < n; i++) {
yy = a[i]._mp_d;
sy = a[i]._mp_size;
if (!sy || !b[i]) continue;
carry = mpn_addmul_1(xx, yy, sy, b[i]);
yy = xx + sy;
*yy += carry;
if (*yy < carry) { /* unsigned comparison! */
do {
yy++;
*yy += 1;
} while (*yy == 0);
}
}
while (sx > 0 && xx[sx-1] == 0) sx--;
res->_mp_size = sx;
}
#endif
void _ntl_crt_struct_eval(void *crt_struct, _ntl_verylong *x, const long *b)
{
struct crt_body *c = (struct crt_body *) crt_struct;
switch (c->strategy) {
case 0: {
struct crt_body_lip *C = &c->U.L;
_ntl_verylong xx, yy, *a;
long i, sx, n;
n = C->n;
sx = C->sbuf;
_ntl_zsetlength(x, sx);
xx = *x;
a = C->v;
for (i = 1; i <= sx; i++)
xx[i] = 0;
xx++;
for (i = 0; i < n; i++) {
yy = a[i];
if (!yy || !b[i]) continue;
zaddmul(b[i], xx, yy);
yy = xx + yy[0];
if ((*yy) >= NTL_RADIX) {
(*yy) -= NTL_RADIX;
yy++;
while ((*yy) == NTL_RADIX-1) {
*yy = 0;
yy++;
}
(*yy)++;
}
}
xx--;
while (sx > 1 && xx[sx] == 0) sx--;
xx[0] = sx;
break;
}
#ifdef NTL_GMP_HACK
case 1: {
struct crt_body_gmp *C = &c->U.G;
mp_limb_t *xx, *yy;
MP_INT *a;
long i, sx, n;
long sy;
mp_limb_t carry;
n = C->n;
sx = C->sbuf;
xx = C->buf->_mp_d;
for (i = 0; i < sx; i++)
xx[i] = 0;
a = C->v;
for (i = 0; i < n; i++) {
yy = a[i]._mp_d;
sy = a[i]._mp_size;
if (!sy || !b[i]) continue;
carry = mpn_addmul_1(xx, yy, sy, b[i]);
yy = xx + sy;
*yy += carry;
if (*yy < carry) { /* unsigned comparison! */
do {
yy++;
*yy += 1;
} while (*yy == 0);
}
}
while (sx > 0 && xx[sx-1] == 0) sx--;
C->buf->_mp_size = sx;
mpz_to_lip(x, C->buf);
break;
}
case 2: {
struct crt_body_gmp1 *C = &c->U.G1;
long n = C->n;
long levels = C->levels;
long *primes = C->primes;
long *inv_vec = C->inv_vec;
long *val_vec = C->val_vec;
long *index_vec = C->index_vec;
MP_INT *prod_vec = C->prod_vec;
MP_INT *rem_vec = C->rem_vec;
MP_INT *coeff_vec = C->coeff_vec;
MP_INT *temps = C->temps;
long vec_len = (1L << levels) - 1;
long i, j;
for (i = 0; i < n; i++) {
SP_MUL_MOD(val_vec[i], b[i], inv_vec[i], primes[i]);
}
for (i = (1L << (levels-1)) - 1; i < vec_len; i++) {
long j1 = index_vec[i];
long j2 = index_vec[i+1];
mpz_add_mul_many(&rem_vec[i], &coeff_vec[j1], &val_vec[j1], j2-j1,
prod_vec[i]._mp_size);
}
for (i = (1L << (levels-1)) - 2; i >= 0; i--) {
mpz_mul(&temps[0], &prod_vec[2*i+1], &rem_vec[2*i+2]);
mpz_mul(&temps[1], &rem_vec[2*i+1], &prod_vec[2*i+2]);
mpz_add(&rem_vec[i], &temps[0], &temps[1]);
}
/* temps[0] = rem_vec[0] mod prod_vec[0] (least absolute residue) */
mpz_fdiv_r(&temps[0], &rem_vec[0], &prod_vec[0]);
mpz_sub(&temps[1], &temps[0], &prod_vec[0]);
if (mpz_cmpabs(&temps[0], &temps[1]) > 0)
mpz_set(&temps[0], &temps[1]);
mpz_fdiv_r(&temps[1], &temps[0], C->modulus);
mpz_to_lip(x, &temps[1]);
break;
}
#endif
default:
zhalt("_crt_struct_eval: inconsistent strategy");
} /* end case */
}
/* Data structures and algorithms for multi-modulus remaindering */
struct rem_body_lip {
long n;
long *primes;
};
#ifdef NTL_GMP_HACK
struct rem_body_gmp {
long n;
long levels;
long *primes;
long *index_vec;
MP_INT *prod_vec;
MP_INT *rem_vec;
};
#endif
#ifdef NTL_SINGLE_MUL
struct rem_body_sm {
long n;
long *primes;
double **tbl;
};
#endif
#if (defined(NTL_TBL_REM) && !defined(NTL_SINGLE_MUL))
struct rem_body_tbl {
long n;
long *primes;
long **tbl;
};
#endif
struct rem_body {
long strategy;
union {
struct rem_body_lip L;
#ifdef NTL_GMP_HACK
struct rem_body_gmp G;
#endif
#ifdef NTL_SINGLE_MUL
struct rem_body_sm S;
#endif
#if (defined(NTL_TBL_REM) && !defined(NTL_SINGLE_MUL))
struct rem_body_tbl T;
#endif
} U;
};
void _ntl_rem_struct_init(void **rem_struct, long n, _ntl_verylong modulus,
const long *p)
{
struct rem_body *r;
r = (struct rem_body *) NTL_MALLOC(1, sizeof(struct rem_body), 0);
if (!r) zhalt("out of memory");
#ifdef NTL_GMP_HACK
if (_ntl_gmp_hack && n >= 32) {
struct rem_body_gmp *R = &r->U.G;
long *q;
long i, j;
long levels, vec_len;
long *index_vec;
MP_INT *prod_vec, *rem_vec;
q = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!q) zhalt("out of memory");
for (i = 0; i < n; i++)
q[i] = p[i];
levels = 0;
while ((n >> levels) >= 4) levels++;
vec_len = (1L << levels) - 1;
index_vec = (long *) NTL_MALLOC((vec_len+1), sizeof(long), 0);
if (!index_vec) zhalt("out of memory");
prod_vec = (MP_INT *) NTL_MALLOC(vec_len, sizeof(MP_INT), 0);
if (!prod_vec) zhalt("out of memory");
rem_vec = (MP_INT *) NTL_MALLOC(vec_len, sizeof(MP_INT), 0);
if (!rem_vec) zhalt("out of memory");
for (i = 0; i < vec_len; i++)
mpz_init(&prod_vec[i]);
for (i = 0; i < vec_len; i++)
mpz_init(&rem_vec[i]);
index_vec[0] = 0;
index_vec[1] = n;
for (i = 0; i <= levels-2; i++) {
long start = (1L << i) - 1;
long finish = (1L << (i+1)) - 2;
for (j = finish; j >= start; j--) {
index_vec[2*j+2] = index_vec[j] + (index_vec[j+1] - index_vec[j])/2;
index_vec[2*j+1] = index_vec[j];
}
index_vec[2*finish+3] = n;
}
for (i = (1L << (levels-1)) - 1; i < vec_len; i++) {
/* multiply primes index_vec[i]..index_vec[i+1]-1 into
* prod_vec[i]
*/
mpz_set_ui(&prod_vec[i], 1);
for (j = index_vec[i]; j < index_vec[i+1]; j++)
mpz_mul_ui(&prod_vec[i], &prod_vec[i], q[j]);
}
for (i = (1L << (levels-1)) - 2; i >= 3; i--)
mpz_mul(&prod_vec[i], &prod_vec[2*i+1], &prod_vec[2*i+2]);
r->strategy = 1;
R->n = n;
R->primes = q;
R->levels = levels;
R->index_vec = index_vec;
R->prod_vec = prod_vec;
R->rem_vec = rem_vec;
*rem_struct = (void *) r;
return;
}
#endif
#ifdef NTL_SINGLE_MUL
{
struct rem_body_sm *R = &r->U.S;
long *qq;
long i;
double **tbl;
long t, t1, j, q;
long sz = modulus[0];
r->strategy = 2;
R->n = n;
qq = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!qq) zhalt("out of memory");
R->primes = qq;
for (i = 0; i < n; i++)
qq[i] = p[i];
tbl = (double **) NTL_MALLOC(n, sizeof(double *), 0);
if (!tbl) zhalt("out of space");
for (i = 0; i < n; i++) {
tbl[i] = (double *) NTL_MALLOC(sz, sizeof(double), 0);
if (!tbl[i]) zhalt("out of space");
}
R->tbl = tbl;
for (i = 0; i < n; i++) {
q = qq[i];
t = (((long)1) << NTL_NBITS) % q;
t1 = 1;
tbl[i][0] = (double) 1;
for (j = 1; j < sz; j++) {
SP_MUL_MOD(t1, t1, t, q);
tbl[i][j] = (double) t1;
}
}
*rem_struct = (void *) r;
return;
}
#endif
#if (defined(NTL_TBL_REM) && !defined(NTL_SINGLE_MUL))
{
struct rem_body_tbl *R = &r->U.T;
long *qq;
long i;
long **tbl;
long t, t1, j, q;
long sz = modulus[0];
r->strategy = 3;
R->n = n;
qq = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!qq) zhalt("out of memory");
R->primes = qq;
for (i = 0; i < n; i++)
qq[i] = p[i];
tbl = (long **) NTL_MALLOC(n, sizeof(long *), 0);
if (!tbl) zhalt("out of space");
for (i = 0; i < n; i++) {
tbl[i] = (long *) NTL_MALLOC(sz, sizeof(long), 0);
if (!tbl[i]) zhalt("out of space");
}
R->tbl = tbl;
for (i = 0; i < n; i++) {
q = qq[i];
t = (((long)1) << NTL_NBITS) % q;
t1 = 1;
tbl[i][0] = 1;
for (j = 1; j < sz; j++) {
SP_MUL_MOD(t1, t1, t, q);
tbl[i][j] = t1;
}
}
*rem_struct = (void *) r;
return;
}
#endif
{
struct rem_body_lip *R = &r->U.L;
long *q;
long i;
r->strategy = 0;
R->n = n;
q = (long *) NTL_MALLOC(n, sizeof(long), 0);
if (!q) zhalt("out of memory");
R->primes = q;
for (i = 0; i < n; i++)
q[i] = p[i];
*rem_struct = (void *) r;
}
}
void _ntl_rem_struct_free(void *rem_struct)
{
struct rem_body *r = (struct rem_body *) rem_struct;
switch (r->strategy) {
case 0: {
free(r->U.L.primes);
free(r);
break;
}
#ifdef NTL_GMP_HACK
case 1: {
struct rem_body_gmp *R = &r->U.G;
long levels = R->levels;
long vec_len = (1L << levels) - 1;
long i;
for (i = 0; i < vec_len; i++)
mpz_clear(&R->prod_vec[i]);
for (i = 0; i < vec_len; i++)
mpz_clear(&R->rem_vec[i]);
free(R->primes);
free(R->index_vec);
free(R->prod_vec);
free(R->rem_vec);
free(r);
break;
}
#endif
#ifdef NTL_SINGLE_MUL
case 2: {
struct rem_body_sm *R = &r->U.S;
long n = R->n;
double **tbl = R->tbl;
long i;
for (i = 0; i < n; i++)
free(tbl[i]);
free(tbl);
free(R->primes);
free(r);
break;
}
#endif
#if (defined(NTL_TBL_REM) && !defined(NTL_SINGLE_MUL))
case 3: {
struct rem_body_tbl *R = &r->U.T;
long n = R->n;
long **tbl = R->tbl;
long i;
for (i = 0; i < n; i++)
free(tbl[i]);
free(tbl);
free(R->primes);
free(r);
break;
}
#endif
default:
zhalt("_ntl_rem_struct_free: inconsistent strategy");
} /* end switch */
}
void _ntl_rem_struct_eval(void *rem_struct, long *x, _ntl_verylong a)
{
struct rem_body *r = (struct rem_body *) rem_struct;
switch (r->strategy) {
case 0: {
struct rem_body_lip *R = &r->U.L;
_ntl_zmultirem(a, R->n, R->primes, x);
return;
}
#ifdef NTL_GMP_HACK
case 1: {
struct rem_body_gmp *R = &r->U.G;
long n = R->n;
long levels = R->levels;
long *q = R->primes;
long *index_vec = R->index_vec;
MP_INT *prod_vec = R->prod_vec;
MP_INT *rem_vec = R->rem_vec;
long vec_len = (1L << levels) - 1;
long i, j;
lip_to_mpz(a, &rem_vec[1]);
mpz_set(&rem_vec[2], &rem_vec[1]);
for (i = 1; i < (1L << (levels-1)) - 1; i++) {
mpz_mod(&rem_vec[2*i+1], &rem_vec[i], &prod_vec[2*i+1]);
mpz_mod(&rem_vec[2*i+2], &rem_vec[i], &prod_vec[2*i+2]);
}
for (i = (1L << (levels-1)) - 1; i < vec_len; i++) {
long lo = index_vec[i];
long hi = index_vec[i+1];
mp_limb_t *s1p = rem_vec[i]._mp_d;
long s1size = rem_vec[i]._mp_size;
if (s1size == 0) {
for (j = lo; j <hi; j++)
x[j] = 0;
}
else {
for (j = lo; j < hi; j++)
x[j] = mpn_mod_1(s1p, s1size, q[j]);
}
}
break;
}
#endif
#ifdef NTL_SINGLE_MUL
case 2: {
struct rem_body_sm *R = &r->U.S;
_ntl_zmultirem2(a, R->n, R->primes, R->tbl, x);
break;
}
#endif
#if (defined(NTL_TBL_REM) && !defined(NTL_SINGLE_MUL))
case 3: {
struct rem_body_tbl *R = &r->U.T;
_ntl_zmultirem3(a, R->n, R->primes, R->tbl, x);
break;
}
#endif
default:
zhalt("_ntl_rem_struct_eval: inconsistent strategy");
} /* end switch */
}
syntax highlighted by Code2HTML, v. 0.9.1