#include <NTL/ZZXFactoring.h>
#include <NTL/lzz_pXFactoring.h>
#include <NTL/vec_vec_long.h>
#include <NTL/vec_vec_ulong.h>
#include <NTL/vec_double.h>

#include <NTL/LLL.h>

#include <NTL/new.h>

NTL_START_IMPL

long ZZXFac_van_Hoeij = 1;

static
long ok_to_abandon = 0;

struct LocalInfoT {
   long n;
   long NumPrimes;
   long NumFactors;
   vec_long p;
   vec_vec_long pattern;
   ZZ PossibleDegrees;
   PrimeSeq s;
};



static
void mul(ZZ_pX& x, vec_ZZ_pX& a)
// this performs multiplications in close-to-optimal order,
// and kills a in the process
{
   long n = a.length();

   // first, deal with some trivial cases

   if (n == 0) {
      set(x);
      a.kill();
      return;
   }
   else if (n == 1) {
      x = a[0];
      a.kill();
      return;
   }

   long i, j;

   // assume n > 1 and all a[i]'s are nonzero

   // sort into non-increasing degrees

   for (i = 1; i <= n - 1; i++)
      for (j = 0; j <= n - i - 1; j++)
         if (deg(a[j]) < deg(a[j+1]))
            swap(a[j], a[j+1]);

   ZZ_pX g;

   while (n > 1) {
      // replace smallest two poly's by their product
      mul(g, a[n-2], a[n-1]);
      a[n-2].kill();
      a[n-1].kill();
      swap(g, a[n-2]);
      n--;

      // re-establish order

      i = n-1;
      while (i > 0 && deg(a[i-1]) < deg(a[i])) {
         swap(a[i-1], a[i]);
         i--;
      }
   }

   x = a[0];

   a[0].kill();
   a.SetLength(0);
}


void mul(ZZX& x, const vec_pair_ZZX_long& a)
{
   long l = a.length();
   ZZX res;
   long i, j;

   set(res);
   for (i = 0; i < l; i++)
      for (j = 0; j < a[i].b; j++)
         mul(res, res, a[i].a);

   x = res;
}


void SquareFreeDecomp(vec_pair_ZZX_long& u, const ZZX& ff)
// input is primitive 
{
   ZZX f = ff;

   ZZX d, v, w, s, t1;
   long i;

   u.SetLength(0);

   if (deg(f) <= 0)
      return;

   diff(t1, f);
   GCD(d, f, t1);

   if (deg(d) == 0) {
      append(u, cons(f, 1));
      return;
   }

   divide(v, f, d); 
   divide(w, t1, d);
   i = 0;

   for (;;) {
      i = i + 1;

      diff(t1, v);
      sub(s, w, t1);

      if (IsZero(s)) {
         if (deg(v) != 0) append(u, cons(v, i));
         return;
      }

      GCD(d, v, s);
      divide(v, v, d);
      divide(w, s, d);

      if (deg(d) != 0) append(u, cons(d, i));
   }
}




static
void HenselLift(ZZX& Gout, ZZX& Hout, ZZX& Aout, ZZX& Bout,
                const ZZX& f, const ZZX& g, const ZZX& h,
                const ZZX& a, const ZZX& b, const ZZ& p) 
{
   ZZX c, g1, h1, G, H, A, B;

   mul(c, g, h);
   sub(c, f, c);

   if (!divide(c, c, p))
      Error("inexact division");

   ZZ_pX cc, gg, hh, aa, bb, tt, gg1, hh1;

   conv(cc, c);
   conv(gg, g);
   conv(hh, h);
   conv(aa, a);
   conv(bb, b);

   ZZ_pXModulus GG;
   ZZ_pXModulus HH;

   build(GG, gg);
   build(HH, hh);

   ZZ_pXMultiplier AA;
   ZZ_pXMultiplier BB;

   build(AA, aa, HH);
   build(BB, bb, GG);

   rem(gg1, cc, GG);
   MulMod(gg1, gg1, BB, GG);
   
   rem(hh1, cc, HH);
   MulMod(hh1, hh1, AA, HH);

   conv(g1, gg1);
   mul(g1, g1, p);
   add(G, g, g1);

   conv(h1, hh1);
   mul(h1, h1, p);
   add(H, h, h1);

   /* lift inverses */

   ZZX t1, t2, r;

   mul(t1, a, G);
   mul(t2, b, H);
   add(t1, t1, t2);
   add(t1, t1, -1);
   negate(t1, t1);

   if (!divide(r, t1, p))
      Error("inexact division");

   ZZ_pX rr, aa1, bb1;

   conv(rr, r);
   
   rem(aa1, rr, HH);
   MulMod(aa1, aa1, AA, HH);
   rem(bb1, rr, GG);
   MulMod(bb1, bb1, BB, GG);

   ZZX a1, b1;

   conv(a1, aa1);
   mul(a1, a1, p);
   add(A, a, a1);

   conv(b1, bb1);
   mul(b1, b1, p);
   add(B, b, b1);

   Gout = G;
   Hout = H;
   Aout = A;
   Bout = B;
}

static
void HenselLift1(ZZX& Gout, ZZX& Hout, 
                const ZZX& f, const ZZX& g, const ZZX& h,
                const ZZX& a, const ZZX& b, const ZZ& p) 
{
   ZZX c, g1, h1, G, H;

   mul(c, g, h);
   sub(c, f, c);

   if (!divide(c, c, p))
      Error("inexact division");

   ZZ_pX cc, gg, hh, aa, bb, tt, gg1, hh1;

   conv(cc, c);
   conv(gg, g);
   conv(hh, h);
   conv(aa, a);
   conv(bb, b);

   ZZ_pXModulus GG;
   ZZ_pXModulus HH;

   build(GG, gg);
   build(HH, hh);

   rem(gg1, cc, GG);
   MulMod(gg1, gg1, bb, GG);
   
   rem(hh1, cc, HH);
   MulMod(hh1, hh1, aa, HH);

   conv(g1, gg1);
   mul(g1, g1, p);
   add(G, g, g1);

   conv(h1, hh1);
   mul(h1, h1, p);
   add(H, h, h1);

   Gout = G;
   Hout = H;
}

static
void BuildTree(vec_long& link, vec_ZZX& v, vec_ZZX& w,
               const vec_zz_pX& a)
{
   long k = a.length();

   if (k < 2) Error("bad arguments to BuildTree");

   vec_zz_pX V, W;

   V.SetLength(2*k-2);
   W.SetLength(2*k-2);
   link.SetLength(2*k-2);

   long i, j, s;
   long minp, mind;

   for (i = 0; i < k; i++) {
      V[i] = a[i];
      link[i] = -(i+1);
   }

   for (j = 0; j < 2*k-4; j += 2) {
      minp = j;
      mind = deg(V[j]);

      for (s = j+1; s < i; s++)
         if (deg(V[s]) < mind) {
            minp = s;
            mind = deg(V[s]);
         }

      swap(V[j], V[minp]);
      swap(link[j], link[minp]);

      minp = j+1;
      mind = deg(V[j+1]);

      for (s = j+2; s < i; s++)
         if (deg(V[s]) < mind) {
            minp = s;
            mind = deg(V[s]);
         }

      swap(V[j+1], V[minp]);
      swap(link[j+1], link[minp]);

      mul(V[i], V[j], V[j+1]);
      link[i] = j;
      i++;
   }

   zz_pX d;

   for (j = 0; j < 2*k-2; j += 2) {
      XGCD(d, W[j], W[j+1], V[j], V[j+1]);
      if (!IsOne(d))
         Error("relatively prime polynomials expected");
   }

   v.SetLength(2*k-2);
   for (j = 0; j < 2*k-2; j++)
      conv(v[j], V[j]);

   w.SetLength(2*k-2);
   for (j = 0; j < 2*k-2; j++)
      conv(w[j], W[j]);
}

static
void RecTreeLift(const vec_long& link, vec_ZZX& v, vec_ZZX& w,
                 const ZZ& p, const ZZX& f, long j, long inv)
{
   if (j < 0) return;

   if (inv)
      HenselLift(v[j], v[j+1], w[j], w[j+1],
                 f, v[j], v[j+1], w[j], w[j+1], p);
   else
      HenselLift1(v[j], v[j+1], f, v[j], v[j+1], w[j], w[j+1], p);

   RecTreeLift(link, v, w, p, v[j], link[j], inv);
   RecTreeLift(link, v, w, p, v[j+1], link[j+1], inv);
}

static
void TreeLift(const vec_long& link, vec_ZZX& v, vec_ZZX& w, 
              long e0, long e1, const ZZX& f, long inv)

// lift from p^{e0} to p^{e1}

{
   ZZ p0, p1;

   power(p0, zz_p::modulus(), e0);
   power(p1, zz_p::modulus(), e1-e0);

   ZZ_pBak bak;
   bak.save();
   ZZ_p::init(p1);

   RecTreeLift(link, v, w, p0, f, v.length()-2, inv);

   bak.restore();
} 

void MultiLift(vec_ZZX& A, const vec_zz_pX& a, const ZZX& f, long e,
               long verbose)

{
   long k = a.length();
   long i;

   if (k < 2 || e < 1 || NTL_OVERFLOW(e, 1, 0)) Error("MultiLift: bad args");

   if (!IsOne(LeadCoeff(f)))
      Error("MultiLift: bad args");

   for (i = 0; i < a.length(); i++)
      if (!IsOne(LeadCoeff(a[i])))
         Error("MultiLift: bad args");

   if (e == 1) {
      A.SetLength(k);
      for (i = 0; i < k; i++)
         conv(A[i], a[i]);
      return;
   }

   vec_long E;
   append(E, e);
   while (e > 1) {
      e = (e+1)/2;
      append(E, e);
   }
   long l = E.length();

   vec_ZZX v, w;
   vec_long link;

   double t;

   if (verbose) {
      cerr << "building tree...";
      t = GetTime();
   }

   BuildTree(link, v, w, a);

   if (verbose) cerr << (GetTime()-t) << "\n";


   for (i = l-1; i > 0; i--) {
      if (verbose) {
         cerr << "lifting to " << E[i-1] << "...";
         t = GetTime();
      }
      
      TreeLift(link, v, w, E[i], E[i-1], f, i != 1);

      if (verbose) cerr << (GetTime()-t) << "\n";
   }

   A.SetLength(k);
   for (i = 0; i < 2*k-2; i++) {
      long t = link[i];
      if (t < 0)
         A[-(t+1)] = v[i];
   }
}

static
void inplace_rev(ZZX& f)
{
   long n = deg(f);
   long i, j;

   i = 0;
   j = n;
   while (i < j) {
      swap(f.rep[i], f.rep[j]);
      i++;
      j--;
   }

   f.normalize();
}

long ZZXFac_InitNumPrimes = 7;
long ZZXFac_MaxNumPrimes = 50;

static 
void RecordPattern(vec_long& pat, vec_pair_zz_pX_long& fac)
{
   long n = pat.length()-1;
   long i;

   for (i = 0; i <= n; i++)
      pat[i] = 0;

   long k = fac.length();

   for (i = 0; i < k; i++) {
      long d = fac[i].b;
      long m = deg(fac[i].a)/d;

      pat[d] = m;
   }
}

static
long NumFactors(const vec_long& pat)
{
   long n = pat.length()-1;

   long i;
   long res = 0;

   for (i = 0; i <= n; i++) 
      res += pat[i];

   return res;
}

static
void CalcPossibleDegrees(ZZ& pd, const vec_long& pat)
{
   long n = pat.length()-1;
   set(pd);

   long d, j;
   ZZ t1;

   for (d = 1; d <= n; d++) 
      for (j = 0; j < pat[d]; j++) {
         LeftShift(t1, pd, d);
         bit_or(pd, pd, t1);
      }
}

static 
void CalcPossibleDegrees(vec_ZZ& S, const vec_ZZ_pX& fac, long k)

// S[i] = possible degrees of the product of any subset of size k
//        among fac[i...], encoded as a bit vector.      

{
   long r = fac.length();

   S.SetLength(r);

   if (r == 0)
      return;

   if (k < 1 || k > r)
      Error("CalcPossibleDegrees: bad args");

   long i, l;
   ZZ old, t1;

   set(S[r-1]);
   LeftShift(S[r-1], S[r-1], deg(fac[r-1]));

   for (i = r-2; i >= 0; i--) {
      set(t1);
      LeftShift(t1, t1, deg(fac[i]));
      bit_or(S[i], t1, S[i+1]);
   }

   for (l = 2; l <= k; l++) {
      old = S[r-l];
      LeftShift(S[r-l], S[r-l+1], deg(fac[r-l]));

      for (i = r-l-1; i >= 0; i--) {
         LeftShift(t1, old, deg(fac[i]));
         old = S[i];
         bit_or(S[i], S[i+1], t1);
      }
   }
}



static
vec_zz_pX *
SmallPrimeFactorization(LocalInfoT& LocalInfo, const ZZX& f,
                            long verbose)

{
   long n = deg(f);
   long i;
   double t;

   LocalInfo.n = n;
   long& NumPrimes = LocalInfo.NumPrimes;
   NumPrimes = 0;

   LocalInfo.NumFactors = 0;

   // some sanity checking...

   if (ZZXFac_InitNumPrimes < 1 || ZZXFac_InitNumPrimes > 10000)
      Error("bad ZZXFac_InitNumPrimes");

   if (ZZXFac_MaxNumPrimes < ZZXFac_InitNumPrimes || ZZXFac_MaxNumPrimes > 10000)
      Error("bad ZZXFac_MaxNumPrimes");

   LocalInfo.p.SetLength(ZZXFac_InitNumPrimes);
   LocalInfo.pattern.SetLength(ZZXFac_InitNumPrimes);
  
   // set bits 0..n of LocalInfo.PossibleDegrees 
   SetBit(LocalInfo.PossibleDegrees, n+1);
   add(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, -1);

   long minr = n+1;
   long irred = 0;

   vec_pair_zz_pX_long *bestfac = 0;
   zz_pX *besth = 0;
   vec_zz_pX *spfactors = 0;
   zz_pContext bestp;
   long bestp_index;

   long maxroot = NextPowerOfTwo(deg(f))+1;

   for (; NumPrimes < ZZXFac_InitNumPrimes;) {
      long p = LocalInfo.s.next();
      if (!p) Error("out of small primes");
      if (divide(LeadCoeff(f), p)) {
         if (verbose) cerr << "skipping " << p << "\n";
         continue;
      }
      zz_p::init(p, maxroot);

      zz_pX ff, ffp, d;

      conv(ff, f);
      MakeMonic(ff);
      diff(ffp, ff);

      GCD(d, ffp, ff);
      if (!IsOne(d)) {
         if (verbose)  cerr << "skipping " << p << "\n";
         continue;
      }


      if (verbose) {
         cerr << "factoring mod " << p << "...";
         t = GetTime();
      }

      vec_pair_zz_pX_long thisfac;
      zz_pX thish; 

      SFCanZass1(thisfac, thish, ff, 0);

      LocalInfo.p[NumPrimes] = p;

      vec_long& pattern = LocalInfo.pattern[NumPrimes];
      pattern.SetLength(n+1);

      RecordPattern(pattern, thisfac);
      long r = NumFactors(pattern);
      
      if (verbose) {
         cerr << (GetTime()-t) << "\n";
         cerr << "degree sequence: ";
         for (i = 0; i <= n; i++)
            if (pattern[i]) {
               cerr << pattern[i] << "*" << i << " ";
            }
         cerr << "\n";
      }

      if (r == 1) {
         irred = 1;
         break;
      }

      // update admissibility info

      ZZ pd;

      CalcPossibleDegrees(pd, pattern);
      bit_and(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, pd);

      if (weight(LocalInfo.PossibleDegrees) == 2) {
         irred = 1;
         break;
      }


      if (r < minr) {
         minr = r;
         delete bestfac;
         bestfac = NTL_NEW_OP vec_pair_zz_pX_long;
         *bestfac = thisfac;
         delete besth;
         besth = NTL_NEW_OP zz_pX;
         *besth = thish;
         bestp.save();
         bestp_index = NumPrimes;
      }

      NumPrimes++;
   }

   if (!irred) {
      // delete best prime from LocalInfo
      swap(LocalInfo.pattern[bestp_index], LocalInfo.pattern[NumPrimes-1]);
      LocalInfo.p[bestp_index] = LocalInfo.p[NumPrimes-1];
      NumPrimes--;

      bestp.restore();

      spfactors = NTL_NEW_OP vec_zz_pX;

      if (verbose) {
         cerr << "p = " << zz_p::modulus() << ", completing factorization...";
         t = GetTime();
      }
      SFCanZass2(*spfactors, *bestfac, *besth, 0);
      if (verbose) {
         cerr << (GetTime()-t) << "\n";
      }
   }

   delete bestfac;
   delete besth;

   return spfactors;
}


static
long ConstTermTest(const vec_ZZ_pX& W, 
                  const vec_long& I,
                  const ZZ& ct,
                  const ZZ_p& lc,
                  vec_ZZ_p& prod,
                  long& ProdLen) 
{
   long k = I.length();
   ZZ_p t;
   ZZ t1, t2;
   long i;

   if (ProdLen == 0) {
      mul(prod[0], lc, ConstTerm(W[I[0]]));
      ProdLen++;
   }

   for (i = ProdLen; i < k; i++)
      mul(prod[i], prod[i-1], ConstTerm(W[I[i]]));

   ProdLen = k-1;

   // should make this a routine in ZZ_p
   t1 = rep(prod[k-1]);
   RightShift(t2, ZZ_p::modulus(), 1);
   if (t1 > t2)
      sub(t1, t1, ZZ_p::modulus());

   return divide(ct, t1);
}

static
void BalCopy(ZZX& g, const ZZ_pX& G)
{
   const ZZ& p = ZZ_p::modulus();
   ZZ p2, t;
   RightShift(p2, p, 1);

   long n = G.rep.length();
   long i;

   g.rep.SetLength(n);
   for (i = 0; i < n; i++) {
      t = rep(G.rep[i]);
      if (t > p2) sub(t, t, p);
      g.rep[i] = t;
   }
}




static
void mul(ZZ_pX& g, const vec_ZZ_pX& W, const vec_long& I)
{
   vec_ZZ_pX w;
   long k = I.length();
   w.SetLength(k);
   long i;

   for (i = 0; i < k; i++)
      w[i] = W[I[i]];

   mul(g, w);
}




static
void InvMul(ZZ_pX& g, const vec_ZZ_pX& W, const vec_long& I)
{
   vec_ZZ_pX w;
   long k = I.length();
   long r = W.length();
   w.SetLength(r-k);
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         w[j-i] = W[j];
   } 

   mul(g, w);
}




static
void RemoveFactors(vec_ZZ_pX& W, const vec_long& I)
{
   long k = I.length();
   long r = W.length();
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         swap(W[j-i], W[j]); 
   }

   W.SetLength(r-k);
}

static
void unpack(vec_long& x, const ZZ& a, long n)
{
   x.SetLength(n+1);
   long i;

   for (i = 0; i <= n; i++)
      x[i] = bit(a, i);
}

static
void SubPattern(vec_long& p1, const vec_long& p2)
{
   long l = p1.length();

   if (p2.length() != l)
      Error("SubPattern: bad args");

   long i;

   for (i = 0; i < l; i++) {
      p1[i] -= p2[i];
      if (p1[i] < 0)
         Error("SubPattern: internal error");
   }
}

static
void UpdateLocalInfo(LocalInfoT& LocalInfo, vec_ZZ& pdeg,
                     const vec_ZZ_pX& W, const vec_ZZX& factors,
                     const ZZX& f, long k, long verbose)
{
   static long cnt = 0;

   if (verbose) {
      cnt = (cnt + 1) % 100;
      if (!cnt) cerr << "#";
   }

   double t;
   long i, j;

   if (LocalInfo.NumFactors < factors.length()) {
      zz_pBak bak;
      bak.save();

      vec_long pattern;
      pattern.SetLength(LocalInfo.n+1);

      ZZ pd;

      if (verbose) {
         cerr << "updating local info...";
         t = GetTime();
      }

      for (i = 0; i < LocalInfo.NumPrimes; i++) {
         zz_p::init(LocalInfo.p[i], NextPowerOfTwo(LocalInfo.n)+1);

         for (j = LocalInfo.NumFactors; j < factors.length(); j++) {
            vec_pair_zz_pX_long thisfac;
            zz_pX thish; 

            zz_pX ff;
            conv(ff, factors[j]);
            MakeMonic(ff);

            SFCanZass1(thisfac, thish, ff, 0);
            RecordPattern(pattern, thisfac);
            SubPattern(LocalInfo.pattern[i], pattern);
         }

         CalcPossibleDegrees(pd, LocalInfo.pattern[i]);
         bit_and(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, pd);

      }

      bak.restore();
      LocalInfo.NumFactors = factors.length();

      CalcPossibleDegrees(pdeg, W, k);

      if (verbose) cerr << (GetTime()-t) << "\n";
   }

   if (!ZZXFac_van_Hoeij && LocalInfo.NumPrimes + 1 < ZZXFac_MaxNumPrimes) {
      if (verbose)
         cerr << "adding a prime\n";

      zz_pBak bak;
      bak.save();

      for (;;) {
         long p = LocalInfo.s.next();
         if (!p)
            Error("UpdateLocalInfo: out of primes");

         if (divide(LeadCoeff(f), p)) {
            if (verbose) cerr << "skipping " << p << "\n";
            continue;
         }

         zz_p::init(p, NextPowerOfTwo(deg(f))+1);

         zz_pX ff, ffp, d;
   
         conv(ff, f);
         MakeMonic(ff);
         diff(ffp, ff);
   
         GCD(d, ffp, ff);
         if (!IsOne(d)) {
            if (verbose)  cerr << "skipping " << p << "\n";
            continue;
         }

         vec_pair_zz_pX_long thisfac;
         zz_pX thish;

         if (verbose) {
            cerr << "factoring mod " << p << "...";
            t = GetTime();
         }

         SFCanZass1(thisfac, thish, ff, 0);

         LocalInfo.p.SetLength(LocalInfo.NumPrimes+1);
         LocalInfo.pattern.SetLength(LocalInfo.NumPrimes+1);

         LocalInfo.p[LocalInfo.NumPrimes] = p;
         vec_long& pattern = LocalInfo.pattern[LocalInfo.NumPrimes];

         pattern.SetLength(LocalInfo.n+1);
         RecordPattern(pattern, thisfac);

         if (verbose) {
            cerr << (GetTime()-t) << "\n";
            cerr << "degree sequence: ";
            for (i = 0; i <= LocalInfo.n; i++)
               if (pattern[i]) {
                  cerr << pattern[i] << "*" << i << " ";
               }
            cerr << "\n";
         }

         ZZ pd;
         CalcPossibleDegrees(pd, pattern);
         bit_and(LocalInfo.PossibleDegrees, LocalInfo.PossibleDegrees, pd);

         LocalInfo.NumPrimes++;

         break;
      }

      bak.restore();
   }
}



const int ZZX_OVERLIFT = NTL_BITS_PER_LONG;
  // number of bits by which we "overlift"....this enables, in particular,
  // the "n-1" test.  
  // Must lie in the range 4..NTL_BITS_PER_LONG.


#define EXTRA_BITS (1)
// Any small number, like 1, 2 or 3, should be OK.


static
void CardinalitySearch(vec_ZZX& factors, ZZX& f, 
                       vec_ZZ_pX& W, 
                       LocalInfoT& LocalInfo, 
                       long k,
                       long bnd,
                       long verbose)
{
   double start_time, end_time;

   if (verbose) {
      start_time = GetTime();
      cerr << "\n************ ";
      cerr << "start cardinality " << k << "\n";
   }

   vec_long I, D;
   I.SetLength(k);
   D.SetLength(k);

   long r = W.length();

   vec_ZZ_p prod;
   prod.SetLength(k);
   long ProdLen;

   vec_ZZ pdeg;
   CalcPossibleDegrees(pdeg, W, k);

   ZZ pd;
   vec_long upd;

   long i, state;

   long cnt = 0;

   ZZ ct;
   mul(ct, ConstTerm(f), LeadCoeff(f));

   ZZ_p lc;
   conv(lc, LeadCoeff(f));

   ZZ_pX gg;
   ZZX g, h;

   I[0] = 0;  

   while (I[0] <= r-k) {
      bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees);

      if (IsZero(pd)) {
         if (verbose) cerr << "skipping\n";
         goto done;
      }

      unpack(upd, pd, LocalInfo.n);

      D[0] = deg(W[I[0]]);
      i = 1;
      state = 0;
      ProdLen = 0;

      for (;;) {
         if (i < ProdLen)
            ProdLen = i;

         if (i == k) {
            // process indices I[0], ..., I[k-1]

            if (cnt > 2000000) { 
               cnt = 0;
               UpdateLocalInfo(LocalInfo, pdeg, W, factors, f, k, verbose);
               bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees);
               if (IsZero(pd)) {
                  if (verbose) cerr << "skipping\n";
                  goto done;
               }
               unpack(upd, pd, LocalInfo.n);
            }

            state = 1;  // default continuation state


            if (!upd[D[k-1]]) {
               i--;
               cnt++;
               continue;
            }

            if (!ConstTermTest(W, I, ct, lc, prod, ProdLen)) {
               i--;
               cnt += 100;
               continue;
            }

            if (verbose) {
               cerr << "+";
            }

            cnt += 1000;

            if (2*D[k-1] <= deg(f)) {
               mul(gg, W, I);
               mul(gg, gg, lc);
               BalCopy(g, gg);
               if(MaxBits(g) > bnd) {
                  i--;
                  continue;
               }
               if (verbose) {
                  cerr << "*";
               }
               PrimitivePart(g, g);
               if (!divide(h, f, g)) {
                  i--;
                  continue;
               }
               
               // factor found!
               append(factors, g);
               if (verbose) {
                 cerr << "degree " << deg(g) << " factor found\n";
               }
               f = h;
               mul(ct, ConstTerm(f), LeadCoeff(f));
               conv(lc, LeadCoeff(f));
            }
            else {
               InvMul(gg, W, I);
               mul(gg, gg, lc);
               BalCopy(g, gg);
               if(MaxBits(g) > bnd) {
                  i--;
                  continue;
               }
               if (verbose) {
                  cerr << "*";
               }
               PrimitivePart(g, g);
               if (!divide(h, f, g)) {
                  i--;
                  continue;
               }

               // factor found!
               append(factors, h);
               if (verbose) {
                 cerr << "degree " << deg(h) << " factor found\n";
               }
               f = g;
               mul(ct, ConstTerm(f), LeadCoeff(f));
               conv(lc, LeadCoeff(f));
            }

            RemoveFactors(W, I);
            r = W.length();
            cnt = 0;

            if (2*k > r) 
               goto done;
            else 
               break;
         }
         else if (state == 0) {
            I[i] = I[i-1] + 1;
            D[i] = D[i-1] + deg(W[I[i]]);
            i++;
         }
         else { // state == 1
            I[i]++;
            if (i == 0) break;

            if (I[i] > r-k+i)
               i--;
            else {
               D[i] = D[i-1] + deg(W[I[i]]);
               i++;
               state = 0;
            }
         }
      }
   }


   done: 


   if (verbose) {
      end_time = GetTime();
      cerr << "\n************ ";
      cerr << "end cardinality " << k << "\n";
      cerr << "time: " << (end_time-start_time) << "\n";
   }
}



typedef unsigned long TBL_T;

#if (NTL_BITS_PER_LONG >= 64)

// for 64-bit machines

#define TBL_MSK (63)
#define TBL_SHAMT (6)

#else

// for 32-bit machines

#define TBL_MSK (31)
#define TBL_SHAMT (5)

#endif


#if 0

// recursive version

static
void RecInitTab(TBL_T ***lookup_tab, long i, const vec_ulong& ratio, 
             long r, long k, unsigned long thresh1, long **shamt_tab,
             unsigned long sum, long card, long j)
{
   if (j >= i || card >= k-1) {
      if (card > 1) {
         long shamt = shamt_tab[i][card];
         unsigned long index1 = ((-sum) >> shamt);
         lookup_tab[i][card][index1 >> TBL_SHAMT] |= (1UL << (index1 & TBL_MSK));
         unsigned long index2 = ((-sum+thresh1) >> shamt);
         if (index1 != index2)
            lookup_tab[i][card][index2 >> TBL_SHAMT] |= (1UL << (index2 & TBL_MSK));

      }

      return;
   }


   RecInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab, sum, card, j+1);
   RecInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab, 
              sum+ratio[r-1-j], card+1, j+1);
}


static
void DoInitTab(TBL_T ***lookup_tab, long i, const vec_ulong& ratio, 
               long r, long k, unsigned long thresh1, long **shamt_tab)
{
   RecInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab, 0, 0, 0);
}

#else

// iterative version


static
void DoInitTab(TBL_T ***lookup_tab, long i, const vec_ulong& ratio,
               long r, long k, unsigned long thresh1, long **shamt_tab)
{
   vec_long sum_vec, card_vec, location_vec;
   sum_vec.SetLength(i+1);
   card_vec.SetLength(i+1);
   location_vec.SetLength(i+1);

   long j = 0;
   sum_vec[0] = 0;
   card_vec[0] = 0;

   unsigned long sum;
   long  card, location;

   location = 0;

   while (j >= 0) {
      sum = sum_vec[j];
      card = card_vec[j];

      switch (location) {

      case 0:

         if (j >= i || card >= k-1) {
            if (card > 1) {
               long shamt = shamt_tab[i][card];
               unsigned long index1 = ((-sum) >> shamt);
               lookup_tab[i][card][index1 >> TBL_SHAMT] |= (1UL << (index1 & TBL_MSK));
               unsigned long index2 = ((-sum+thresh1) >> shamt);
               if (index1 != index2)
                  lookup_tab[i][card][index2 >> TBL_SHAMT] |= (1UL << (index2 & TBL_MSK));
      
            }
      
            location = location_vec[j];
            j--;
            continue;
         }


         sum_vec[j+1] = sum;
         card_vec[j+1] = card;
         location_vec[j+1] = 1;
         j++;
         location = 0;
         continue;

      case 1:

         sum_vec[j+1] = sum+ratio[r-1-j];
         card_vec[j+1] = card+1;
         location_vec[j+1] = 2;
         j++;
         location = 0;
         continue;  

      case 2:

         location = location_vec[j];
         j--;
         continue;
      }
   }
}
         
#endif
   
   

static
void InitTab(TBL_T ***lookup_tab, const vec_ulong& ratio, long r, long k,
             unsigned long thresh1, long **shamt_tab, long pruning)
{
   long i, j, t;

   if (pruning) {
      for (i = 2; i <= pruning; i++) {
         long len = min(k-1, i);
         for (j = 2; j <= len; j++) {
            long ub = (((1L << (NTL_BITS_PER_LONG-shamt_tab[i][j])) 
                      + TBL_MSK) >> TBL_SHAMT); 
            for (t = 0; t < ub; t++)
               lookup_tab[i][j][t] = 0;
         }
   
         DoInitTab(lookup_tab, i, ratio, r, k, thresh1, shamt_tab);
      }
   }
}


static
void RatioInit1(vec_ulong& ratio, const vec_ZZ_pX& W, const ZZ_p& lc,
                long pruning, TBL_T ***lookup_tab, 
                vec_vec_ulong& pair_ratio, long k, unsigned long thresh1, 
                long **shamt_tab)
{
   long r = W.length();
   long i, j;

   ZZ_p a;

   ZZ p;
   p = ZZ_p::modulus();

   ZZ aa;

   for (i = 0; i < r; i++) {
      long m = deg(W[i]);
      mul(a, W[i].rep[m-1], lc);
      LeftShift(aa, rep(a), NTL_BITS_PER_LONG);
      div(aa, aa, p);
      ratio[i] = to_ulong(aa);
   }

   InitTab(lookup_tab, ratio, r, k, thresh1, shamt_tab, pruning);

   for (i = 0; i < r; i++)
      for (j = 0; j < i; j++) {
         mul(a, W[i].rep[deg(W[i])-1], W[j].rep[deg(W[j])-1]);
         mul(a, a, lc);
         LeftShift(aa, rep(a), NTL_BITS_PER_LONG);
         div(aa, aa, p);
         pair_ratio[i][j] = to_ulong(aa);
      }

   for (i = 0; i < r; i++) {
      long m = deg(W[i]);
      if (m >= 2) {
         mul(a, W[i].rep[m-2], lc);
         LeftShift(aa, rep(a), NTL_BITS_PER_LONG);
         div(aa, aa, p);
         pair_ratio[i][i] = to_ulong(aa);
      }
      else
         pair_ratio[i][i] = 0;
   }
}

static 
long SecondOrderTest(const vec_long& I_vec, const vec_vec_ulong& pair_ratio_vec,
                     vec_ulong& sum_stack_vec, long& SumLen)
{
   long k = I_vec.length();
   const long *I = I_vec.elts();
   unsigned long *sum_stack = sum_stack_vec.elts();

   unsigned long sum, thresh1;

   if (SumLen == 0) {
      unsigned long epsilon = (1UL << (NTL_BITS_PER_LONG-ZZX_OVERLIFT));
      unsigned long delta = (unsigned long) ((k*(k+1)) >> 1);
      unsigned long thresh = epsilon + delta;
      thresh1 = (epsilon << 1) + delta;

      sum = thresh;
      sum_stack[k] = thresh1;
   }
   else {
      sum = sum_stack[SumLen-1];
      thresh1 = sum_stack[k];
   }

   long i, j;

   for (i = SumLen; i < k; i++) {
      const unsigned long *p = pair_ratio_vec[I[i]].elts();
      for (j = 0; j <= i; j++) {
         sum += p[I[j]];
      }

      sum_stack[i] = sum;
   }

   SumLen = k-1;

   return (sum <= thresh1);
}


static
ZZ choose_fn(long r, long k)
{
   ZZ a, b;

   a = 1; 
   b = 1;

   long i;
   for (i = 0; i < k; i++) {
      a *= r-i;
      b *= k-i;
   }

   return a/b;
}

static
void PrintInfo(const char *s, const ZZ& a, const ZZ& b)
{
   cerr << s << a << " / " << b << " = ";
   
   double x = to_double(a)/to_double(b);

   if (x == 0) 
      cerr << "0"; 
   else {
      int n;
      double f;

      f = frexp(x, &n);
      cerr << f << "*2^" << n;
   }

   cerr << "\n";
}

static
void RemoveFactors1(vec_long& W, const vec_long& I, long r)
{
   long k = I.length();
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         swap(W[j-i], W[j]); 
   }
}

static
void RemoveFactors1(vec_vec_long& W, const vec_long& I, long r)
{
   long k = I.length();
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         swap(W[j-i], W[j]); 
   }

   for (i = 0; i < r-k; i++)
      RemoveFactors1(W[i], I, r);
}


// should this swap go in tools.h?
// Maybe not...I don't want to pollute the interface too much more.

static inline 
void swap(unsigned long& a, unsigned long& b)  
   { unsigned long t;  t = a; a = b; b = t; }

static
void RemoveFactors1(vec_ulong& W, const vec_long& I, long r)
{
   long k = I.length();
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         swap(W[j-i], W[j]); 
   }
}

static
void RemoveFactors1(vec_vec_ulong& W, const vec_long& I, long r)
{
   long k = I.length();
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         swap(W[j-i], W[j]); 
   }

   for (i = 0; i < r-k; i++)
      RemoveFactors1(W[i], I, r);
}


static
void RemoveFactors1(vec_ZZ_p& W, const vec_long& I, long r)
{
   long k = I.length();
   long i, j;

   i = 0;
   for (j = 0; j < r; j++) {
      if (i < k && j == I[i])
         i++;
      else
         swap(W[j-i], W[j]);
   }
}

static
void SumCoeffs(ZZ& sum, const ZZX& a)
{
   ZZ res;
   res = 0;
   long i;
   long n = a.rep.length();
   for (i = 0; i < n; i++)
      res += a.rep[i];

   sum = res;
}

static
void SumCoeffs(ZZ_p& sum, const ZZ_pX& a)
{
   ZZ_p res;
   res = 0;
   long i;
   long n = a.rep.length();
   for (i = 0; i < n; i++)
      res += a.rep[i];

   sum = res;
}


static
long ConstTermTest(const vec_ZZ_p& W, 
                  const vec_long& I,
                  const ZZ& ct,
                  const ZZ_p& lc,
                  vec_ZZ_p& prod,
                  long& ProdLen) 
{
   long k = I.length();
   ZZ_p t;
   ZZ t1, t2;
   long i;

   if (ProdLen == 0) {
      mul(prod[0], lc, W[I[0]]);
      ProdLen++;
   }

   for (i = ProdLen; i < k; i++)
      mul(prod[i], prod[i-1], W[I[i]]);

   ProdLen = k-1;

   // should make this a routine in ZZ_p
   t1 = rep(prod[k-1]);
   RightShift(t2, ZZ_p::modulus(), 1);
   if (t1 > t2)
      sub(t1, t1, ZZ_p::modulus());

   return divide(ct, t1);
}


long ZZXFac_MaxPrune = 10;



static
long pruning_bnd(long r, long k)
{
   double x = 0; 

   long i;
   for (i = 0; i < k; i++) {
      x += log(double(r-i)/double(k-i));
   }

   return long((x/log(2.0)) * 0.75);
}

static
long shamt_tab_init(long pos, long card, long pruning, long thresh1_len)
{
   double x = 1;
   long i;

   for (i = 0; i < card; i++) {
      x *= double(pos-i)/double(card-i);
   }

   x *= pruning;  // this can be adjusted to control the density
   if (pos <= 6) x *= 2;  // a little boost that costs very little
      

   long t = long(ceil(log(x)/log(2.0)));

   t = max(t, TBL_SHAMT); 

   t = min(t, NTL_BITS_PER_LONG-thresh1_len);


   return NTL_BITS_PER_LONG-t;
}

// The following routine should only be called for k > 1,
// and is only worth calling for k > 2.


static
void CardinalitySearch1(vec_ZZX& factors, ZZX& f, 
                       vec_ZZ_pX& W, 
                       LocalInfoT& LocalInfo, 
                       long k,
                       long bnd,
                       long verbose)
{
   double start_time, end_time;

   if (verbose) {
      start_time = GetTime();
      cerr << "\n************ ";
      cerr << "start cardinality " << k << "\n";
   }

   if (k <= 1) Error("internal error: call CardinalitySearch");

   // This test is needed to ensure correcntes of "n-2" test
   if (NumBits(k) > NTL_BITS_PER_LONG/2-2)
      Error("Cardinality Search: k too large...");

   vec_ZZ pdeg;
   CalcPossibleDegrees(pdeg, W, k);
   ZZ pd;

   bit_and(pd, pdeg[0], LocalInfo.PossibleDegrees);
   if (pd == 0) {
      if (verbose) cerr << "skipping\n";
      return;
   }

   vec_long I, D;
   I.SetLength(k);
   D.SetLength(k);

   long r = W.length();

   long initial_r = r;

   vec_ulong ratio, ratio_sum;
   ratio.SetLength(r);
   ratio_sum.SetLength(k);

   unsigned long epsilon = (1UL << (NTL_BITS_PER_LONG-ZZX_OVERLIFT));
   unsigned long delta = (unsigned long) k;
   unsigned long thresh = epsilon + delta;
   unsigned long thresh1 = (epsilon << 1) + delta;

   long thresh1_len = NumBits(long(thresh1)); 

   long pruning;

   pruning = min(r/2, ZZXFac_MaxPrune);
   pruning = min(pruning, pruning_bnd(r, k));
   pruning = min(pruning, NTL_BITS_PER_LONG-EXTRA_BITS-thresh1_len);

   if (pruning <= 4) pruning = 0;

   long init_pruning = pruning;

   TBL_T ***lookup_tab = 0;

   long **shamt_tab = 0;

   if (pruning) {
      typedef long *long_p;

      long i, j;

      shamt_tab = NTL_NEW_OP long_p[pruning+1];
      if (!shamt_tab) Error("out of mem");
      shamt_tab[0] = shamt_tab[1] = 0;

      for (i = 2; i <= pruning; i++) {
         long len = min(k-1, i);
         shamt_tab[i] = NTL_NEW_OP long[len+1];
         if (!shamt_tab[i]) Error("out of mem");
         shamt_tab[i][0] = shamt_tab[i][1] = 0;

         for (j = 2; j <= len; j++)
            shamt_tab[i][j] = shamt_tab_init(i, j, pruning, thresh1_len);
      }

      typedef  TBL_T *TBL_T_p;
      typedef  TBL_T **TBL_T_pp;

      lookup_tab = NTL_NEW_OP TBL_T_pp[pruning+1];
      if (!lookup_tab) Error("out of mem");

      lookup_tab[0] = lookup_tab[1] = 0;

      for (i = 2; i <= pruning; i++) {
         long len = min(k-1, i);
         lookup_tab[i] = NTL_NEW_OP TBL_T_p[len+1];
         if (!lookup_tab[i]) Error("out of mem");

         lookup_tab[i][0] = lookup_tab[i][1] = 0;

         for (j = 2; j <= len; j++) {
            lookup_tab[i][j] = NTL_NEW_OP TBL_T[((1L << (NTL_BITS_PER_LONG-shamt_tab[i][j]))+TBL_MSK) >> TBL_SHAMT];
            if (!lookup_tab[i][j]) Error("out of mem");
         }
      }
   }

   if (verbose) {
      cerr << "pruning = " << pruning << "\n";
   }

   vec_ZZ_p prod;
   prod.SetLength(k);
   long ProdLen;

   vec_ZZ_p prod1;
   prod1.SetLength(k);
   long ProdLen1;

   vec_ulong sum_stack;
   sum_stack.SetLength(k+1);
   long SumLen;

   vec_long upd;

   long i, state;

   long cnt = 0;

   ZZ ct;
   mul(ct, ConstTerm(f), LeadCoeff(f));

   ZZ_p lc;
   conv(lc, LeadCoeff(f));

   vec_vec_ulong pair_ratio;
   pair_ratio.SetLength(r);
   for (i = 0; i < r; i++)
      pair_ratio[i].SetLength(r);

   RatioInit1(ratio, W, lc, pruning, lookup_tab, pair_ratio, k, thresh1, shamt_tab);

   ZZ c1;
   SumCoeffs(c1, f);
   mul(c1, c1, LeadCoeff(f));

   vec_ZZ_p sum_coeffs;
   sum_coeffs.SetLength(r);
   for (i = 0; i < r; i++)
      SumCoeffs(sum_coeffs[i], W[i]);

   vec_long degv;
   degv.SetLength(r);

   for (i = 0; i < r; i++)
      degv[i] = deg(W[i]);

   ZZ_pX gg;
   ZZX g, h;

   I[0] = 0;  

   long loop_cnt = 0, degree_cnt = 0, n2_cnt = 0, sl_cnt = 0, ct_cnt = 0, 
        pl_cnt = 0, c1_cnt = 0, pl1_cnt = 0, td_cnt = 0;

   ZZ loop_total, degree_total, n2_total, sl_total, ct_total, 
      pl_total, c1_total, pl1_total, td_total;

   while (I[0] <= r-k) {
      bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees);

      if (IsZero(pd)) {
         if (verbose) cerr << "skipping\n";
         goto done;
      }

      unpack(upd, pd, LocalInfo.n);

      D[0] = degv[I[0]];
      ratio_sum[0] = ratio[I[0]] + thresh;
      i = 1;
      state = 0;
      ProdLen = 0;
      ProdLen1 = 0;
      SumLen = 0;

      for (;;) {
         cnt++;

         if (cnt > 2000000) { 
            if (verbose) {
               loop_total += loop_cnt;  loop_cnt = 0;
               degree_total += degree_cnt;  degree_cnt = 0;
               n2_total += n2_cnt;  n2_cnt = 0;
               sl_total += sl_cnt;  sl_cnt = 0;
               ct_total += ct_cnt;  ct_cnt = 0;
               pl_total += pl_cnt;  pl_cnt = 0;
               c1_total += c1_cnt;  c1_cnt = 0;
               pl1_total += pl1_cnt;  pl1_cnt = 0;
               td_total += td_cnt;  td_cnt = 0;
            }

            cnt = 0;
            UpdateLocalInfo(LocalInfo, pdeg, W, factors, f, k, verbose);
            bit_and(pd, pdeg[I[0]], LocalInfo.PossibleDegrees);
            if (IsZero(pd)) {
               if (verbose) cerr << "skipping\n";
               goto done;
            }
            unpack(upd, pd, LocalInfo.n);
         }

         if (i == k-1) {

            unsigned long ratio_sum_last = ratio_sum[k-2];
            long I_last = I[k-2];


            {
               long D_last = D[k-2];
   
               unsigned long rs;
               long I_this;
               long D_this;
   
               for (I_this = I_last+1; I_this < r; I_this++) {
                  loop_cnt++;
   
                  rs = ratio_sum_last + ratio[I_this];
                  if (rs > thresh1) {
                     cnt++;
                     continue;
                  }

                  degree_cnt++;
   
                  D_this = D_last + degv[I_this];
   
                  if (!upd[D_this]) {
                     cnt++;
                     continue;
                  }
   
                  n2_cnt++;
                  sl_cnt += (k-SumLen);

                  I[k-1] = I_this;

                  if (!SecondOrderTest(I, pair_ratio, sum_stack, SumLen)) {
                     cnt += 2;
                     continue;
                  }

                  c1_cnt++;
                  pl1_cnt += (k-ProdLen1);

                  if (!ConstTermTest(sum_coeffs, I, c1, lc, prod1, ProdLen1)) {
                     cnt += 100;
                     continue;
                  }

                  ct_cnt++;
                  pl_cnt += (k-ProdLen);

                  D[k-1] = D_this;

                  if (!ConstTermTest(W, I, ct, lc, prod, ProdLen)) {
                     cnt += 100;
                     continue;
                  }

                  td_cnt++;
   
                  if (verbose) {
                     cerr << "+";
                  }
   
                  cnt += 1000;
   
                  if (2*D[k-1] <= deg(f)) {
                     mul(gg, W, I);
                     mul(gg, gg, lc);
                     BalCopy(g, gg);
                     if(MaxBits(g) > bnd) {
                        continue;
                     }
                     if (verbose) {
                        cerr << "*";
                     }
                     PrimitivePart(g, g);
                     if (!divide(h, f, g)) {
                        continue;
                     }
                  
                     // factor found!
                     append(factors, g);
                     if (verbose) {
                       cerr << "degree " << deg(g) << " factor found\n";
                     }
                     f = h;
                     mul(ct, ConstTerm(f), LeadCoeff(f));
                     conv(lc, LeadCoeff(f));
                  }
                  else {
                     InvMul(gg, W, I);
                     mul(gg, gg, lc);
                     BalCopy(g, gg);
                     if(MaxBits(g) > bnd) {
                        continue;
                     }
                     if (verbose) {
                        cerr << "*";
                     }
                     PrimitivePart(g, g);
                     if (!divide(h, f, g)) {
                        continue;
                     }
      
                     // factor found!
                     append(factors, h);
                     if (verbose) {
                       cerr << "degree " << deg(h) << " factor found\n";
                     }
                     f = g;
                     mul(ct, ConstTerm(f), LeadCoeff(f));
                     conv(lc, LeadCoeff(f));
                  }
      
                  RemoveFactors(W, I);
                  RemoveFactors1(degv, I, r);
                  RemoveFactors1(sum_coeffs, I, r);
                  RemoveFactors1(ratio, I, r);
                  RemoveFactors1(pair_ratio, I, r);

                  r = W.length();
                  cnt = 0;

                  pruning = min(pruning, r/2);
                  if (pruning <= 4) pruning = 0;

                  InitTab(lookup_tab, ratio, r, k, thresh1, shamt_tab, pruning);

                  if (2*k > r) 
                     goto done;
                  else 
                     goto restart;
               } /* end of inner for loop */ 

            }

            i--;
            state = 1;  
         }
         else {
            if (state == 0) {
               long I_i = I[i-1] + 1;
               I[i] = I_i;

               long pruned;

               if (pruning && r-I_i <= pruning) {
                  long pos = r-I_i;
                  unsigned long rs = ratio_sum[i-1];
                  unsigned long index1 = (rs >> shamt_tab[pos][k-i]);
                  if (lookup_tab[pos][k-i][index1 >> TBL_SHAMT] & (1UL << (index1&TBL_MSK)))
                     pruned = 0;
                  else
                     pruned = 1;
               }
               else
                  pruned = 0; 

               if (pruned) {
                  i--;
                  state = 1;
               }
               else {
                  D[i] = D[i-1] + degv[I_i];
                  ratio_sum[i] = ratio_sum[i-1] + ratio[I_i];
                  i++;
               }
            }
            else { // state == 1
      
               loop_cnt++;
      
               if (i < ProdLen)
                  ProdLen = i;
      
               if (i < ProdLen1)
                  ProdLen1 = i;
      
               if (i < SumLen)
                  SumLen = i;

               long I_i = (++I[i]);

               if (i == 0) break;
   
               if (I_i > r-k+i) {
                  i--;
               }
               else {

                  long pruned;

                  if (pruning && r-I_i <= pruning) {
                     long pos = r-I_i;
                     unsigned long rs = ratio_sum[i-1];
                     unsigned long index1 = (rs >> shamt_tab[pos][k-i]);
                     if (lookup_tab[pos][k-i][index1 >> TBL_SHAMT] & (1UL << (index1&TBL_MSK)))
                        pruned = 0;
                     else
                        pruned = 1;
                  }
                  else
                     pruned = 0; 
   

                  if (pruned) {
                     i--;
                  }
                  else {
                     D[i] = D[i-1] + degv[I_i];
                     ratio_sum[i] = ratio_sum[i-1] + ratio[I_i];
                     i++;
                     state = 0;
                  }
               }
            }
         }
      }

      restart: ;
   }

   done:

   if (lookup_tab) {
      long i, j;
      for (i = 2; i <= init_pruning; i++) {
         long len = min(k-1, i);
         for (j = 2; j <= len; j++) {
            delete [] lookup_tab[i][j];
         }

         delete [] lookup_tab[i];
      }

      delete [] lookup_tab;
   }

   if (shamt_tab) {
      long i;
      for (i = 2; i <= init_pruning; i++) {
         delete [] shamt_tab[i];
      }

      delete [] shamt_tab;
   }

   if (verbose) { 
      end_time = GetTime();
      cerr << "\n************ ";
      cerr << "end cardinality " << k << "\n";
      cerr << "time: " << (end_time-start_time) << "\n";
      ZZ loops_max = choose_fn(initial_r+1, k);
      ZZ tuples_max = choose_fn(initial_r, k);

      loop_total += loop_cnt;
      degree_total += degree_cnt;
      n2_total += n2_cnt;
      sl_total += sl_cnt;
      ct_total += ct_cnt;
      pl_total += pl_cnt;
      c1_total += c1_cnt;
      pl1_total += pl1_cnt;
      td_total += td_cnt;

      cerr << "\n";
      PrintInfo("loops: ", loop_total, loops_max);
      PrintInfo("degree tests: ", degree_total, tuples_max);

      PrintInfo("n-2 tests: ", n2_total, tuples_max);

      cerr << "ave sum len: ";
      if (n2_total == 0) 
         cerr << "--";
      else
         cerr << (to_double(sl_total)/to_double(n2_total));
      cerr << "\n";

      PrintInfo("f(1) tests: ", c1_total, tuples_max);

      cerr << "ave prod len: ";
      if (c1_total == 0) 
         cerr << "--";
      else
         cerr << (to_double(pl1_total)/to_double(c1_total));
      cerr << "\n";

      PrintInfo("f(0) tests: ", ct_total, tuples_max);

      cerr << "ave prod len: ";
      if (ct_total == 0) 
         cerr << "--";
      else
         cerr << (to_double(pl_total)/to_double(ct_total));
      cerr << "\n";

      PrintInfo("trial divs: ", td_total, tuples_max);
   }
}



static
void FindTrueFactors(vec_ZZX& factors, const ZZX& ff, 
                     const vec_ZZX& w, const ZZ& P, 
                     LocalInfoT& LocalInfo,
                     long verbose,
                     long bnd)
{
   ZZ_pBak bak;
   bak.save();
   ZZ_p::init(P);

   long r = w.length();

   vec_ZZ_pX W;
   W.SetLength(r);

   long i;
   for (i = 0; i < r; i++)
      conv(W[i], w[i]);


   ZZX f;

   f = ff;

   long k;

   k = 1;
   factors.SetLength(0);
   while (2*k <= W.length()) {
      if (k <= 1)
         CardinalitySearch(factors, f, W, LocalInfo, k, bnd, verbose);
      else
         CardinalitySearch1(factors, f, W, LocalInfo, k, bnd, verbose);
      k++;
   }

   append(factors, f);

   bak.restore();
}





/**********************************************************************\

                        van Hoeij's algorithm 

\**********************************************************************/



const long van_hoeij_size_thresh = 12; 
// Use van Hoeij's algorithm if number of modular factors exceeds this bound.
// Must be >= 1.

const long van_hoeij_card_thresh = 3;
// Switch to knapsack method if cardinality of candidate factors
// exceeds this bound.
// Must be >= 1.




// This routine assumes that the input f is a non-zero polynomial
// of degree n, and returns the value f(a).

static 
ZZ PolyEval(const ZZX& f, const ZZ& a)
{
   if (f == 0) Error("PolyEval: internal error");

   long n = deg(f);

   ZZ acc, t1, t2;
   long i;

   acc = f.rep[n];

   for (i = n-1; i >= 0; i--) {
      mul(t1, acc, a);
      add(acc, t1, f.rep[i]);
   }

   return acc;
}


// This routine assumes that the input f is a polynomial with non-zero constant
// term, of degree n, and with leading coefficient c; it returns 
// an upper bound on the absolute value of the roots of the
// monic, integer polynomial g(X) =  c^{n-1} f(X/c).

static 
ZZ RootBound(const ZZX& f)
{
   if (ConstTerm(f) == 0) Error("RootBound: internal error");

   long n = deg(f);

   ZZX g;
   long i;

   g = f;

   if (g.rep[n] < 0) negate(g.rep[n], g.rep[n]);
   for (i = 0; i < n; i++) {
      if (g.rep[i] > 0) negate(g.rep[i], g.rep[i]);
   }

   ZZ lb, ub, mb;


   lb = 0;

   ub = 1;
   while (PolyEval(g, ub) < 0) {
      ub = 2*ub;
   }

   // lb < root <= ub

   while (ub - lb > 1) {
      ZZ mb = (ub + lb)/2;

      if (PolyEval(g, mb) < 0) 
         lb = mb;
      else 
         ub = mb;
   }

   return ub*g.rep[n];
}


// This routine takes as input an n x m integer matrix M, where the rows of M 
// are assumed to be linearly independent.
// It is also required that both n and m are non-zero.
// It computes an integer d, along with an n x m matrix R, such that
// R*d^{-1} is the reduced row echelon form of M.
// The routine is probabilistic: the output is always correct, but the
// routine may abort the program with negligible probability
// (specifically, if GenPrime returns a composite, and the modular
// gauss routine can't invert a non-zero element).

static
void gauss(ZZ& d_out, mat_ZZ& R_out, const mat_ZZ& M)
{
   long n = M.NumRows();
   long m = M.NumCols();

   if (n == 0 || m == 0) Error("gauss: internal error");

   zz_pBak bak;
   bak.save();

   for (;;) {
      long p = GenPrime_long(NTL_SP_NBITS);
      zz_p::init(p);

      mat_zz_p MM;
      conv(MM, M);

      long r = gauss(MM);
      if (r < n) continue;

      // compute pos(1..n), so that pos(i) is the index 
      // of the i-th pivot column

      vec_long pos;
      pos.SetLength(n);

      long i, j;
      for (i = j = 1; i <= n; i++) {
         while (MM(i, j) == 0) j++;
         pos(i) = j;
         j++;
      } 

      // compute the n x n sub-matrix consisting of the
      // pivot columns of M

      mat_ZZ S;
      S.SetDims(n, n);

      for (i = 1; i <= n; i++)
         for (j = 1; j <= n; j++)
            S(i, j) = M(i, pos(j));

      mat_ZZ S_inv;
      ZZ d;

      inv(d, S_inv, S);
      if (d == 0) continue;

      mat_ZZ R;
      mul(R, S_inv, M);

      // now check that R is of the right form, which it will be
      // if we were not unlucky

      long OK = 1;

      for (i = 1; i <= n && OK; i++) {
         for (j = 1; j < pos(i) && OK; j++)
            if (R(i, j) != 0) OK = 0;

         if (R(i, pos(i)) != d) OK = 0;

         for (j = 1; j < i && OK; j++)
            if (R(j, pos(i)) != 0) OK = 0;
      }

      if (!OK) continue;

      d_out = d;
      R_out = R;
      break;
   }
}


// The input polynomial f should be monic, and deg(f) > 0.
// The input P should be > 1.
// Tr.length() >= d, and Tr(i), for i = 1..d-1, should be the
// Tr_i(f) mod P (in van Hoeij's notation).
// The quantity Tr_d(f) mod P is computed, and stored in Tr(d).


void ComputeTrace(vec_ZZ& Tr, const ZZX& f, long d, const ZZ& P)
{
   long n = deg(f);

   // check arguments

   if (n <= 0 || LeadCoeff(f) != 1) 
      Error("ComputeTrace: internal error (1)");

   if (d <= 0)
      Error("ComputeTrace: internal error (2)");

   if (Tr.length() < d)
      Error("ComputeTrace: internal error (3)");

   if (P <= 1)
      Error("ComputeTrace: internal error (4)");

   // treat d > deg(f) separately

   if (d > n) {
      ZZ t1, t2;
      long i;

      t1 = 0;

      for (i = 1; i <= n; i++) {
         mul(t2, Tr(i + d - n - 1), f.rep[i-1]); 
         add(t1, t1, t2);
      }

      rem(t1, t1, P);
      NegateMod(t1, t1, P);
      Tr(d) = t1;
   }
   else {
      ZZ t1, t2;
      long i;

      mul(t1, f.rep[n-d], d);

      for (i = 1; i < d; i++) {
         mul(t2, Tr(i), f.rep[n-d+i]);
         add(t1, t1, t2);
      }

      rem(t1, t1, P);
      NegateMod(t1, t1, P);
      Tr(d) = t1;
   }
}

// Tr(1..d) are traces as computed above.
// C and pb have length at least d.
// For i = 1..d, pb(i) = p^{a_i} for a_i > 0.
// pdelta = p^delta for delta > 0.
// P = p^a for some a >= max{ a_i : i=1..d }.

// This routine computes C(1..d), where 
// C(i) = C_{a_i}^{a_i + delta}( Tr(i)*lc^i ) for i = 1..d.


void ChopTraces(vec_ZZ& C, const vec_ZZ& Tr, long d,
                const vec_ZZ& pb, const ZZ& pdelta, const ZZ& P, const ZZ& lc)
{
   if (d <= 0) Error("ChopTraces: internal error (1)");
   if (C.length() < d) Error("ChopTraces: internal error (2)");
   if (Tr.length() < d) Error("ChopTraces: internal error (3)");
   if (pb.length() < d) Error("ChopTraces: internal error (4)");
   if (P <= 1) Error("ChopTraces: internal error (5)");

   ZZ lcpow, lcred;
   lcpow = 1;
   rem(lcred, lc, P);

   ZZ pdelta_2;
   RightShift(pdelta_2, pdelta, 1);

   ZZ t1, t2;

   long i;
   for (i = 1; i <= d; i++) {
      MulMod(lcpow, lcpow, lcred, P);
      MulMod(t1, lcpow, Tr(i), P);

      RightShift(t2, pb(i), 1);
      add(t1, t1, t2);
      div(t1, t1, pb(i));
      rem(t1, t1, pdelta);
      if (t1 > pdelta_2)
         sub(t1, t1, pdelta);

      C(i) = t1;
   }
}


// Similar to above, but computes a linear combination of traces.


static
void DenseChopTraces(vec_ZZ& C, const vec_ZZ& Tr, long d, long d1, 
                     const ZZ& pb_eff, const ZZ& pdelta, const ZZ& P, 
                     const ZZ& lc, const mat_ZZ& A)
{

   ZZ pdelta_2;
   RightShift(pdelta_2, pdelta, 1);

   ZZ pb_eff_2;
   RightShift(pb_eff_2, pb_eff, 1);

   ZZ acc, t1, t2;

   long i, j;

   ZZ lcpow, lcred;
   rem(lcred, lc, P);

   for (i = 1; i <= d1; i++) {
      lcpow = 1;
      acc = 0;

      for (j = 1; j <= d; j++) {
         MulMod(lcpow, lcpow, lcred, P);
         MulMod(t1, lcpow, Tr(j), P);
         rem(t2, A(i, j), P);
         MulMod(t1, t1, t2, P);
         AddMod(acc, acc, t1, P);
      }

      t1 = acc;
      add(t1, t1, pb_eff_2);
      div(t1, t1, pb_eff);
      rem(t1, t1, pdelta);
      if (t1 > pdelta_2)
         sub(t1, t1, pdelta);

      C(i) = t1;
   }
}


static
void Compute_pb(vec_long& b,vec_ZZ& pb, long p, long d, 
                const ZZ& root_bound, long n)
{
   ZZ t1, t2;
   long i;

   t1 = 2*power(root_bound, d)*n;

   if (d == 1) {
      i = 0;
      t2 = 1;
   }
   else {
      i = b(d-1);
      t2 = pb(d-1);
   }

   while (t2 <= t1) {
      i++;
      t2 *= p;
   }

   b.SetLength(d);
   b(d) = i;

   pb.SetLength(d);
   pb(d) = t2;
}

static
void Compute_pdelta(long& delta, ZZ& pdelta, long p, long bit_delta)
{
   ZZ t1;
   long i;

   i = delta;
   t1 = pdelta;

   while (NumBits(t1) <= bit_delta) {
      i++;
      t1 *= p;
   }

   delta = i;
   pdelta = t1;
}

static
void BuildReductionMatrix(mat_ZZ& M, long& C, long r, long d, const ZZ& pdelta,
                          const vec_vec_ZZ& chop_vec, 
                          const mat_ZZ& B_L, long verbose)
{
   long s = B_L.NumRows();

   C = long( sqrt(double(d) * double(r)) / 2.0 ) + 1;

   M.SetDims(s+d, r+d);
   clear(M);


   long i, j, k;
   ZZ t1, t2;

   for (i = 1; i <= s; i++)
      for (j = 1; j <= r; j++)
         mul(M(i, j), B_L(i, j), C);

   ZZ pdelta_2;

   RightShift(pdelta_2, pdelta, 1);

   long maxbits = 0;

   for (i = 1; i <= s; i++)
      for (j = 1; j <= d; j++) {
         t1 = 0;
         for (k = 1; k <= r; k++) {
            mul(t2, B_L(i, k), chop_vec(k)(j));
            add(t1, t1, t2);
         }

         rem(t1, t1, pdelta);
         if (t1 > pdelta_2)
            sub(t1, t1, pdelta);

         maxbits = max(maxbits, NumBits(t1));

         M(i, j+r) = t1;
      }
  

   for (i = 1; i <= d; i++)
      M(i+s, i+r) = pdelta;

   if (verbose) 
      cerr << "ratio = " << double(maxbits)/double(NumBits(pdelta))
           << "; ";
}


static
void CutAway(mat_ZZ& B1, vec_ZZ& D, mat_ZZ& M, 
             long C, long r, long d)
{
   long k = M.NumRows();
   ZZ bnd = 4*to_ZZ(C)*to_ZZ(C)*to_ZZ(r) + to_ZZ(d)*to_ZZ(r)*to_ZZ(r);

   while (k >= 1 && 4*D[k] > bnd*D[k-1]) k--;

   mat_ZZ B2;

   B2.SetDims(k, r);
   long i, j;

   for (i = 1; i <= k; i++)
      for (j = 1; j <= r; j++)
         div(B2(i, j), M(i, j), C);

   M.kill(); // save space
   D.kill();

   ZZ det2;
   long rnk;

   rnk = image(det2, B2);

   B1.SetDims(rnk, r);
   for (i = 1; i <= rnk; i++)
      for (j = 1; j <= r; j++)
         B1(i, j) = B2(i + k - rnk, j);
}




static
long GotThem(vec_ZZX& factors, 
             const mat_ZZ& B_L,
             const vec_ZZ_pX& W, 
             const ZZX& f, 
             long bnd,
             long verbose)
{
   double tt0, tt1;
   ZZ det;
   mat_ZZ R;
   long s, r;
   long i, j, cnt;

   if (verbose) {
      cerr << "   checking A (s = " << B_L.NumRows() 
           << "): gauss...";
   }

   tt0 = GetTime();

   gauss(det, R, B_L);

   tt1 = GetTime();

   if (verbose) cerr << (tt1-tt0) << "; ";

   // check if condition A holds

   s = B_L.NumRows();
   r = B_L.NumCols();

   for (j = 0; j < r; j++) {
      cnt = 0;
      for (i = 0; i < s; i++) {
         if (R[i][j] == 0) continue;
         if (R[i][j] != det) {
            if (verbose) cerr << "failed.\n";
            return 0;
         }
         cnt++;
      }

      if (cnt != 1) {
         if (verbose) cerr << "failed.\n";
         return 0;
      }
   }

   if (verbose) {
      cerr << "passed.\n";
      cerr << "   checking B...";
   }

   // extract relevant information from R

   vec_vec_long I_vec;
   I_vec.SetLength(s);

   vec_long deg_vec;
   deg_vec.SetLength(s);

   for (i = 0; i < s; i++) {
      long dg = 0;

      for (j = 0; j < r; j++) {
         if (R[i][j] != 0) append(I_vec[i], j);
         dg += deg(W[j]);
      }

      deg_vec[i] = dg;
   }

   R.kill(); // save space


   // check if any candidate factor is the product of too few
   // modular factors

   for (i = 0; i < s; i++)
      if (I_vec[i].length() <= van_hoeij_card_thresh) {
         if (verbose) cerr << "X\n";
         return 0;
      }

   if (verbose) cerr << "1";


   // sort deg_vec, I_vec in order of increasing degree

   for (i = 0; i < s-1; i++)
      for (j = 0; j < s-1-i; j++)
         if (deg_vec[j] > deg_vec[j+1]) {
            swap(deg_vec[j], deg_vec[j+1]);
            swap(I_vec[j], I_vec[j+1]);
         }


   // perform constant term tests

   ZZ ct;
   mul(ct, LeadCoeff(f), ConstTerm(f));

   ZZ half_P;
   RightShift(half_P, ZZ_p::modulus(), 1);

   ZZ_p lc, prod;
   conv(lc, LeadCoeff(f));

   ZZ t1;

   for (i = 0; i < s; i++) {
      vec_long& I = I_vec[i];
      prod = lc;
      for (j = 0; j < I.length(); j++)
         mul(prod, prod, ConstTerm(W[I[j]]));

      t1 = rep(prod);
      if (t1 > half_P)
         sub(t1, t1, ZZ_p::modulus());

      if (!divide(ct, t1)) {
          if (verbose) cerr << "X\n";
          return 0;
      }
   }

   if (verbose) cerr << "2";


   // multiply out polynomials and perform size tests

   vec_ZZX fac;
   ZZ_pX gg;
   ZZX g;

   for (i = 0; i < s-1; i++) {
      vec_long& I = I_vec[i];
      mul(gg, W, I);
      mul(gg, gg, lc);
      BalCopy(g, gg);
      if (MaxBits(g) > bnd) {
         if (verbose) cerr << "X\n";
         return 0;
      }
      PrimitivePart(g, g);
      append(fac, g);
   }

   if (verbose) cerr << "3";


   // finally...trial division

   ZZX f1 = f;
   ZZX h;

   for (i = 0; i < s-1; i++) {
      if (!divide(h, f1, fac[i])) {
         cerr << "X\n";
         return 0;
      }

      f1 = h;
   }

   // got them!

   if (verbose) cerr << "$\n";

   append(factors, fac);
   append(factors, f1);

   return 1;
}


void AdditionalLifting(ZZ& P1, 
                       long& e1, 
                       vec_ZZX& w1, 
                       long p, 
                       long new_bound,
                       const ZZX& f, 
                       long doubling,
                       long verbose)
{
   long new_e1;

   if (doubling)
      new_e1 = max(2*e1, new_bound); // at least double e1
   else
      new_e1 = new_bound;

   if (verbose) {
      cerr << ">>> additional hensel lifting to " << new_e1 << "...\n";
   }

   ZZ new_P1;

   power(new_P1, p, new_e1);

   ZZX f1;
   ZZ t1, t2;
   long i;
   long n = deg(f);

   if (LeadCoeff(f) == 1)
      f1 = f;
   else if (LeadCoeff(f) == -1)
      negate(f1, f);
   else {
      rem(t1, LeadCoeff(f), new_P1);
      InvMod(t1, t1, new_P1);
      f1.rep.SetLength(n+1); 
      for (i = 0; i <= n; i++) {
         mul(t2, f.rep[i], t1);
         rem(f1.rep[i], t2, new_P1);
      }
   }

   zz_pBak bak;
   bak.save();

   zz_p::init(p, NextPowerOfTwo(n)+1);

   long r = w1.length();

   vec_zz_pX ww1;
   ww1.SetLength(r);
   for (i = 0; i < r; i++)
      conv(ww1[i], w1[i]);

   w1.kill();

   double tt0, tt1;

   tt0 = GetTime();

   MultiLift(w1, ww1, f1, new_e1, verbose);

   tt1 = GetTime();

   if (verbose) {
      cerr << "lifting time: " << (tt1-tt0) << "\n\n";
   }

   P1 = new_P1;
   e1 = new_e1;

   bak.restore();
}

static
void Compute_pb_eff(long& b_eff, ZZ& pb_eff, long p, long d, 
                    const ZZ& root_bound,  
                    long n, long ran_bits)
{
   ZZ t1, t2;
   long i;

   if (root_bound == 1)
      t1 = (to_ZZ(d)*to_ZZ(n)) << (ran_bits + 1);
   else
      t1 = (power(root_bound, d)*n) << (ran_bits + 2);

   i = 0;
   t2 = 1;

   while (t2 <= t1) {
      i++;
      t2 *= p;
   }

   b_eff = i;
   pb_eff = t2;
}



static
long d1_val(long bit_delta, long r, long s)
{
   return long( 0.30*double(r)*double(s)/double(bit_delta) ) + 1;
}




// Next comes van Hoeij's algorithm itself.
// Some notation that differs from van Hoeij's paper:
//   n = deg(f)
//   r = # modular factors
//   s = dim(B_L)  (gets smaller over time)
//   d = # traces used
//   d1 = number of "compressed" traces
//
// The algorithm starts with a "sparse" version of van Hoeij, so that
// at first the traces d = 1, 2, ... are used in conjunction with
// a d x d identity matrix for van Hoeij's matrix A.
// The number of "excess" bits used for each trace, bit_delta, is initially
// 2*r.
// 
// When d*bit_delta exceeds 0.25*r*s, we switch to 
// a "dense" mode, where we use only about 0.25*r*s "compressed" traces.
// These bounds follow from van Hoeij's heuristic estimates.
//
// In sparse mode, d and bit_delta increase exponentially (but gently).
// In dense mode, but d increases somewhat more aggressively,
// and bit_delta is increased more gently.


static
void FindTrueFactors_vH(vec_ZZX& factors, const ZZX& ff, 
                        const vec_ZZX& w, const ZZ& P, 
                        long p, long e,
                        LocalInfoT& LocalInfo,
                        long verbose,
                        long bnd)
{
   const long SkipSparse = 0;

   ZZ_pBak bak;
   bak.save();
   ZZ_p::init(P);

   long r = w.length();

   vec_ZZ_pX W;
   W.SetLength(r);

   long i, j;

   for (i = 0; i < r; i++)
      conv(W[i], w[i]);


   ZZX f;

   f = ff;

   long k;

   k = 1;
   factors.SetLength(0);
   while (2*k <= W.length() && 
      (k <= van_hoeij_card_thresh || W.length() <= van_hoeij_size_thresh)) {

      if (k <= 1)
         CardinalitySearch(factors, f, W, LocalInfo, k, bnd, verbose);
      else
         CardinalitySearch1(factors, f, W, LocalInfo, k, bnd, verbose);
      k++;
   }

   if (2*k > W.length()) {
      // rest is irreducible, so we're done

      append(factors, f);
   }
   else {

      // now we apply van Hoeij's algorithm proper to f
   
      double time_start, time_stop, lll_time, tt0, tt1;

      time_start = GetTime();
      lll_time = 0;
   
      if (verbose) {
         cerr << "\n\n*** starting knapsack procedure\n";
      }
   
      ZZ P1 = P;
      long e1 = e;    // invariant: P1 = p^{e1}
   
      r = W.length();
   
      vec_ZZX w1;
      w1.SetLength(r);
      for (i = 0; i < r; i++)
         conv(w1[i], W[i]);
   
      long n = deg(f);
   
      mat_ZZ B_L;            // van Hoeij's lattice
      ident(B_L, r);
   
      long d = 0;            // number of traces
      
      long bit_delta = 0;    // number of "excess" bits

      vec_long b;
      vec_ZZ pb;             // pb(i) = p^{b(i)}

      long delta = 0;
      ZZ pdelta = to_ZZ(1);  // pdelta = p^delta
      pdelta = 1;
   
      vec_vec_ZZ trace_vec;
      trace_vec.SetLength(r);
   
      vec_vec_ZZ chop_vec;
      chop_vec.SetLength(r);
   
      ZZ root_bound = RootBound(f);
   
      if (verbose) {
         cerr << "NumBits(root_bound) = " << NumBits(root_bound) << "\n";
      }

      long dense = 0;
      long ran_bits = 32;

      long loop_cnt = 0;


      long s = r;

      for (;;) {

         loop_cnt++;
   
         // if we are using the power hack, then we do not try too hard...
         // this is really a hack on a hack!

         if (ok_to_abandon && 
             ((d >= 2 && s > 128) || (d >= 3 && s > 32) || (d >= 4 && s > 8) ||
              d >= 5) ) {
            if (verbose) cerr << "   abandoning\n";
            append(factors, f);
            break;
         }

         long d_last, d_inc, d_index;

         d_last = d;

         // set d_inc: 

         if (!dense) {
            d_inc = 1 + d/8;
         }
         else {
            d_inc = 1 + d/4; 
         }

         d_inc = min(d_inc, n-1-d);
            
         d += d_inc;

         // set bit_delta:
   
         if (bit_delta == 0) {
            // set initial value...don't make it any smaller than 2*r

            bit_delta = 2*r; 
         }
         else {
            long extra_bits;

            if (!dense) {
               extra_bits = 1 + bit_delta/8;
            }
            else if (d_inc != 0) {
               if (d1_val(bit_delta, r, s) > 1)
                  extra_bits = 1 + bit_delta/16; 
               else
                  extra_bits = 0;
            }
            else
               extra_bits = 1 + bit_delta/8;

            bit_delta += extra_bits;
         }

         if (d > d1_val(bit_delta, r, s)) 
            dense = 1;
   
         Compute_pdelta(delta, pdelta, p, bit_delta);

         long d1;
         long b_eff;
         ZZ pb_eff;

         if (!dense) {
            for (d_index = d_last + 1; d_index <= d; d_index++)
               Compute_pb(b, pb, p, d_index, root_bound, n);

            d1 = d;
            b_eff = b(d);
            pb_eff = pb(d);
         }
         else {
            d1 = d1_val(bit_delta, r, s);
            Compute_pb_eff(b_eff, pb_eff, p, d, root_bound, n, ran_bits); 
         }

         if (verbose) {
            cerr << "*** d = " << d 
                 << "; s = " << s 
                 << "; delta = " << delta 
                 << "; b_eff = " << b_eff;

            if (dense) cerr << "; dense [" << d1 << "]";
            cerr << "\n";
         }
   
         if (b_eff + delta > e1) {
            long doubling;

            doubling = 1;

            AdditionalLifting(P1, e1, w1, p, b_eff + delta, f, 
                              doubling, verbose);

            if (verbose) {
               cerr << ">>> recomputing traces...";
            }

            tt0 = GetTime();

            trace_vec.kill();
            trace_vec.SetLength(r);

            for (i = 0; i < r; i++) {
               trace_vec[i].SetLength(d_last);

               for (d_index = 1; d_index <= d_last; d_index++) {
                  ComputeTrace(trace_vec[i], w1[i], d_index, P1);
               }
            }

            tt1 = GetTime();
            if (verbose) cerr << (tt1-tt0) << "\n";
         }
   
         if (verbose) cerr << "   trace..."; 
   
         tt0 = GetTime();

         mat_ZZ A;

         if (dense) {
            A.SetDims(d1, d);
            for (i = 1; i <= d1; i++)
               for (j = 1; j <= d; j++) {
                  RandomBits(A(i, j), ran_bits);
                  if (RandomBnd(2)) negate(A(i, j), A(i, j));
               }
         }
      
   
         for (i = 0; i < r; i++) {
            trace_vec[i].SetLength(d);
            for (d_index = d_last + 1; d_index <= d; d_index++)
               ComputeTrace(trace_vec[i], w1[i], d_index, P1);
   
            chop_vec[i].SetLength(d1);

            if (!dense)
               ChopTraces(chop_vec[i], trace_vec[i], d, pb, pdelta, 
                          P1, LeadCoeff(f));
            else
               DenseChopTraces(chop_vec[i], trace_vec[i], d, d1, pb_eff, 
                               pdelta, P1, LeadCoeff(f), A);
         }

         A.kill();
   
         tt1 = GetTime();
   
         if (verbose) cerr << (tt1-tt0) << "\n";
   
         mat_ZZ M;
         long C;
   
         if (verbose) cerr << "   building matrix...";
   
         tt0 = GetTime();
   
         BuildReductionMatrix(M, C, r, d1, pdelta, chop_vec, B_L, verbose);
   
         tt1 = GetTime();
   
         if (verbose) cerr << (tt1-tt0) << "\n";

         if (SkipSparse) {   
            if (!dense) {
               if (verbose) cerr << "skipping LLL\n";
               continue;
            }
         }

         if (verbose) cerr << "   LLL...";
   
         tt0 = GetTime();
   
         vec_ZZ D;
         long rnk = LLL_plus(D, M);
   
         tt1 = GetTime();

         lll_time += (tt1-tt0);
   
         if (verbose) cerr << (tt1-tt0) << "\n";
   
         if (rnk != s + d1) {
            Error("van Hoeij -- bad rank");
         }
   
         mat_ZZ B1;
   
         if (verbose) cerr << "   CutAway...";
   
         tt0 = GetTime();
   
         CutAway(B1, D, M, C, r, d1);
   
         tt1 = GetTime();
   
         if (verbose) cerr << (tt1-tt0) << "\n";
   
         if (B1.NumRows() >= s) continue;
         // no progress...try again

         // otherwise, update B_L and test if we are done
   
         swap(B1, B_L);
         B1.kill();
         s = B_L.NumRows();
   
         if (s == 0)
            Error("oops! s == 0 should not happen!");
   
         if (s == 1) {
            if (verbose) cerr << "   irreducible!\n";
            append(factors, f);
            break;
         }
   
         if (s > r / (van_hoeij_card_thresh + 1)) continue;
         // dimension too high...we can't be done
   
         if (GotThem(factors, B_L, W, f, bnd, verbose)) break;
      }

      time_stop = GetTime();

      if (verbose) {
         cerr << "*** knapsack finished: total time = " 
              << (time_stop - time_start) << "; LLL time = "
              << lll_time << "\n";
      }
   }

   bak.restore();
}


static
void ll_SFFactor(vec_ZZX& factors, const ZZX& ff, 
                 long verbose,
                 long bnd)

// input is primitive and square-free, with positive leading
// coefficient
{
   if (deg(ff) <= 1) {
      factors.SetLength(1);
      factors[0] = ff;
      if (verbose) {
         cerr << "*** SFFactor, trivial case 1.\n";
      }
      return;
   }

   // remove a factor of X, if necessary

   ZZX f;
   long xfac;
   long rev;

   double t;

   if (IsZero(ConstTerm(ff))) {
      RightShift(f, ff, 1);
      xfac = 1;
   }
   else {
      f = ff;
      xfac = 0;
   }

   // return a factor of X-1 if necessary

   long x1fac = 0;

   ZZ c1;
   SumCoeffs(c1, f);

   if (c1 == 0) {
      x1fac = 1;
      div(f, f, ZZX(1,1) - 1);
   }

   SumCoeffs(c1, f);

   if (deg(f) <= 1) {
      long r = 0;
      factors.SetLength(0);
      if (deg(f) > 0) {
         factors.SetLength(r+1);
         factors[r] = f;
         r++;
      }
      if (xfac) {
         factors.SetLength(r+1);
         SetX(factors[r]);
         r++;
      }

      if (x1fac) {
         factors.SetLength(r+1);
         factors[r] = ZZX(1,1) - 1;
         r++;
      }

      if (verbose) {
         cerr << "*** SFFactor: trivial case 2.\n";
      }

      return;
   }

   if (verbose) {
      cerr << "*** start SFFactor.\n";
   }

   // reverse f if this makes lead coefficient smaller

   ZZ t1, t2;

   abs(t1, LeadCoeff(f));
   abs(t2, ConstTerm(f));

   if (t1 > t2) {
      inplace_rev(f);
      rev = 1;
   }
   else 
      rev = 0;

   // obtain factorization modulo small primes

   if (verbose) {
      cerr << "factorization modulo small primes...\n";
      t = GetTime();
   }

   LocalInfoT LocalInfo;

   zz_pBak bak;
   bak.save();

   vec_zz_pX *spfactors =
       SmallPrimeFactorization(LocalInfo, f, verbose);

   if (!spfactors) {
      // f was found to be irreducible 

      bak.restore();

      if (verbose) {
         t = GetTime()-t;
         cerr << "small prime time: " << t << ", irreducible.\n";
      }

      if (rev)
         inplace_rev(f);

      long r = 0;

      factors.SetLength(r+1);
      factors[r] = f;
      r++;

      if (xfac) {
         factors.SetLength(r+1);
         SetX(factors[r]);
         r++;
      }

      if (x1fac) {
         factors.SetLength(r+1);
         factors[r] = ZZX(1,1) - 1;
         r++;
      }

      return;
   }

   if (verbose) {
      t = GetTime()-t;
      cerr << "small prime time: ";
      cerr << t << ", number of factors = " << spfactors->length() << "\n";
   }

   // prepare for Hensel lifting

   // first, calculate bit bound 

   long bnd1;
   long n = deg(f);
   long i;
   long e;
   ZZ P;
   long p;
   
   bnd1 = MaxBits(f) + (NumBits(n+1)+1)/2;

   if (!bnd || bnd1 < bnd)
      bnd = bnd1;

   i = n/2;
   while (!bit(LocalInfo.PossibleDegrees, i))
      i--;

   long lc_bnd = NumBits(LeadCoeff(f));

   long coeff_bnd = bnd + lc_bnd + i;

   long lift_bnd;

   lift_bnd = coeff_bnd + 15;  
   // +15 helps avoid trial divisions...can be any number >= 0

   lift_bnd = max(lift_bnd, bnd + lc_bnd + 2*NumBits(n) + ZZX_OVERLIFT);
   // facilitates "n-1" and "n-2" tests

   lift_bnd = max(lift_bnd, lc_bnd + NumBits(c1));
   // facilitates f(1) test

   lift_bnd += 2;
   // +2 needed to get inequalities right


   p = zz_p::modulus();

   e = long(double(lift_bnd)/(log(double(p))/log(double(2))));
   power(P, p, e);

   while (NumBits(P) <= lift_bnd) { 
      mul(P, P, p);
      e++;
   }

   if (verbose) {
      cerr << "lifting bound = " << lift_bnd << " bits.\n";
      cerr << "Hensel lifting to exponent " << e << "...\n";
      t = GetTime();
   }

   // third, compute f1 so that it is monic and equal to f mod P

   ZZX f1;

   if (LeadCoeff(f) == 1)
      f1 = f;
   else if (LeadCoeff(f) == -1)
      negate(f1, f);
   else {
      rem(t1, LeadCoeff(f), P);
      if (sign(P) < 0)
         Error("whoops!!!");
      InvMod(t1, t1, P);
      f1.rep.SetLength(n+1);
      for (i = 0; i <= n; i++) {
         mul(t2, f.rep[i], t1);
         rem(f1.rep[i], t2, P);
      }
   }


   // Do Hensel lift

   vec_ZZX w;

   MultiLift(w, *spfactors, f1, e, verbose);


   if (verbose) {
      t = GetTime()-t;
      cerr << "\nlifting time: ";
      cerr << t << "\n\n";
   }

   // We're done with zz_p...restore

   delete spfactors;
   bak.restore();

   // search for true factors

   if (verbose) {
      cerr << "searching for true factors...\n";
      t = GetTime();
   }

   if (ZZXFac_van_Hoeij && w.length() > van_hoeij_size_thresh)
      FindTrueFactors_vH(factors, f, w, P, p, e, 
                         LocalInfo, verbose, coeff_bnd);
   else
      FindTrueFactors(factors, f, w, P, LocalInfo, verbose, coeff_bnd);

   if (verbose) {
      t = GetTime()-t;
      cerr << "factor search time " << t << "\n";
   }

   long r = factors.length();

   if (rev) {
      for (i = 0; i < r; i++) {
         inplace_rev(factors[i]);
         if (sign(LeadCoeff(factors[i])) < 0)
            negate(factors[i], factors[i]);
      }
   }

   if (xfac) {
      factors.SetLength(r+1);
      SetX(factors[r]);
      r++;
   }

   if (x1fac) {
      factors.SetLength(r+1);
      factors[r] = ZZX(1,1)-1;
      r++;
   }

   // that's it!!

   if (verbose) {
      cerr << "*** end SFFactor.  degree sequence:\n";
      for (i = 0; i < r; i++)
         cerr << deg(factors[i]) << " ";
      cerr << "\n";
   }
}



static 
long DeflationFactor(const ZZX& f)
{
   long n = deg(f);
   long m = 0;
   long i;

   for (i = 1; i <= n && m != 1; i++) {
      if (f.rep[i] != 0)
         m = GCD(m, i);
   }

   return m;
}

static
void inflate(ZZX& g, const ZZX& f, long m)
// input may not alias output
{
   long n = deg(f);
   long i;

   g = 0;
   for (i = n; i >= 0; i--) 
      SetCoeff(g, i*m, f.rep[i]);
}

static
void deflate(ZZX& g, const ZZX& f, long m)
// input may not alias output
{
   long n = deg(f);
   long i;

   g = 0;
   for (i = n; i >= 0; i -= m) 
      SetCoeff(g, i/m, f.rep[i]);
}

static
void MakeFacList(vec_long& v, long m)
{
   if (m <= 0) Error("internal error: MakeFacList");

   v.SetLength(0);

   long p = 2;
   while (m > 1) {
      while (m % p == 0)  {
         append(v, p);
         m = m / p;
      }

      p++;
   }
}

long ZZXFac_PowerHack = 1;

void SFFactor(vec_ZZX& factors, const ZZX& ff, 
              long verbose,
              long bnd)

// input is primitive and square-free, with positive leading
// coefficient

{
   if (ff == 0) 
      Error("SFFactor: bad args");

   if (deg(ff) <= 0) {
      factors.SetLength(0);
      return;
   }


   if (!ZZXFac_PowerHack) {
      ok_to_abandon = 0;
      ll_SFFactor(factors, ff, verbose, bnd);
      return;
   }

   long m = DeflationFactor(ff);

   if (m == 1) {
      if (verbose) {
         cerr << "SFFactor -- no deflation\n";
      }

      ok_to_abandon = 0;
      ll_SFFactor(factors, ff, verbose, bnd);
      return;
   }


   vec_long v;
   MakeFacList(v, m);
   long l = v.length();

   if (verbose) {
      cerr << "SFFactor -- deflation: " << v << "\n";
   }

   vec_ZZX res;
   res.SetLength(1);
   deflate(res[0], ff, m);

   long done;
   long j, k;

   done = 0;
   k = l-1;

   while (!done) {
      vec_ZZX res1;
      res1.SetLength(0);
      for (j = 0; j < res.length(); j++) {
         vec_ZZX res2;
         double t;
         if (verbose) {
            cerr << "begin - step " << k << ", " << j << "; deg = " 
                 << deg(res[j]) << "\n";
            t = GetTime();
         }

         if (k < 0)
            ok_to_abandon = 0;
         else
            ok_to_abandon = 1;

         ll_SFFactor(res2, res[j], verbose, k < 0 ? bnd : 0);

         if (verbose) {
            t = GetTime()-t;
            cerr << "end   - step " << k << ", " << j << "; time = "
                 << t << "\n\n";
         }

         append(res1, res2);
      }

      if (k < 0) {
         done = 1;
         swap(res, res1);
      }
      else {
         vec_ZZX res2;
         res2.SetLength(res1.length());
         for (j = 0; j < res1.length(); j++)
            inflate(res2[j], res1[j], v[k]);
         k--;
         swap(res, res2);
      }
   }

   factors = res;
}





void factor(ZZ& c,
            vec_pair_ZZX_long& factors,
            const ZZX& f,
            long verbose,
            long bnd)

{
   ZZX ff = f;

   if (deg(ff) <= 0) {
      c = ConstTerm(ff);
      factors.SetLength(0);
      return;
   }

   content(c, ff);
   divide(ff, ff, c);

   long bnd1 = MaxBits(ff) + (NumBits(deg(ff)+1)+1)/2;
   if (!bnd || bnd > bnd1)
      bnd = bnd1;

   vec_pair_ZZX_long sfd;

   double t;

   if (verbose) { cerr << "square-free decomposition..."; t = GetTime(); }
   SquareFreeDecomp(sfd, ff);
   if (verbose) cerr << (GetTime()-t) << "\n";

   factors.SetLength(0);

   vec_ZZX x;

   long i, j;

   for (i = 0; i < sfd.length(); i++) {
      if (verbose) {
         cerr << "factoring multiplicity " << sfd[i].b
              << ", deg = " << deg(sfd[i].a) << "\n";
         t = GetTime();
      }

      SFFactor(x, sfd[i].a, verbose, bnd);

      if (verbose) {
         t = GetTime()-t;
         cerr << "total time for multiplicity " 
              << sfd[i].b << ": " << t << "\n";
      }

      for (j = 0; j < x.length(); j++)
         append(factors, cons(x[j], sfd[i].b));
   }
}

NTL_END_IMPL


syntax highlighted by Code2HTML, v. 0.9.1