/* The 'P+1' algorithm. Copyright 2002, 2003, 2004, 2005 Paul Zimmermann and Alexander Kruppa. This file is part of the ECM Library. The ECM Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The ECM Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the ECM Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. References: A p+1 Method of Factoring, H. C. Williams, Mathematics of Computation, volume 39, number 159, pages 225-234, 1982. Evaluating recurrences of form X_{m+n} = f(X_m, X_n, X_{m-n}) via Lucas chains, Peter L. Montgomery, December 1983, revised January 1992. */ #include #include #include "ecm-impl.h" #if HAVE_LIMITS_H # include #else # ifndef ULONG_MAX # define ULONG_MAX __GMP_ULONG_MAX # endif #endif /****************************************************************************** * * * Stage 1 * * * ******************************************************************************/ /* prime powers are accumulated up to about n^L1 */ #define L1 1 /* P1 <- V_e(P0), using P, Q as auxiliary variables, where V_{2k}(P0) = V_k(P0)^2 - 2 V_{2k-1}(P0) = V_k(P0)*V_{k-1}(P0) - P0. (More generally V_{m+n} = V_m * V_n - V_{m-n}.) Warning: P1 and P0 may be equal. */ static void pp1_mul (mpres_t P1, mpres_t P0, mpz_t e, mpmod_t n, mpres_t P, mpres_t Q) { mp_size_t size_e; unsigned long i; int sign; sign = mpz_sgn (e); mpz_abs (e, e); if (sign == 0) { mpres_set_ui (P1, 2, n); goto unnegate; } if (mpz_cmp_ui (e, 1) == 0) { mpres_set (P1, P0, n); goto unnegate; } /* now e >= 2 */ mpz_sub_ui (e, e, 1); mpres_mul (P, P0, P0, n); mpres_sub_ui (P, P, 2, n); /* P = V_2(P0) = P0^2-2 */ mpres_set (Q, P0, n); /* Q = V_1(P0) = P0 */ /* invariant: (P, Q) = (V_{k+1}(P0), V_k(P0)), start with k=1 */ size_e = mpz_sizeinbase (e, 2); for (i = size_e - 1; i > 0;) { if (mpz_tstbit (e, --i)) /* k -> 2k+1 */ { if (i) /* Q is not needed for last iteration */ { mpres_mul (Q, P, Q, n); mpres_sub (Q, Q, P0, n); } mpres_mul (P, P, P, n); mpres_sub_ui (P, P, 2, n); } else /* k -> 2k */ { mpres_mul (P, P, Q, n); mpres_sub (P, P, P0, n); if (i) /* Q is not needed for last iteration */ { mpres_mul (Q, Q, Q, n); mpres_sub_ui (Q, Q, 2, n); } } } mpres_set (P1, P, n); mpz_add_ui (e, e, 1); /* recover original value of e */ unnegate: if (sign == -1) mpz_neg (e, e); return; } /* Input: P0 is the initial point (sigma) n is the number to factor B1 is the stage 1 bound B1done: stage 1 was already done up to that limit go: if <> 1, group order to preload Output: a is the factor found, or the value at end of stage 1 Return value: non-zero iff a factor was found. */ static int pp1_stage1 (mpz_t f, mpres_t P0, mpmod_t n, double B1, double *B1done, mpz_t go, int (*stop_asap)(void)) { double B0, p, q, r; mpz_t g; mpres_t P, Q; mpres_t R, S, T; int youpi = ECM_NO_FACTOR_FOUND; unsigned int max_size, size_n; mpz_init (g); mpres_init (P, n); mpres_init (Q, n); mpres_init (R, n); mpres_init (S, n); mpres_init (T, n); B0 = ceil (sqrt (B1)); size_n = mpz_sizeinbase (n->orig_modulus, 2); max_size = L1 * size_n; if (mpz_cmp_ui (go, 1) > 0) pp1_mul (P0, P0, go, n, P, Q); /* suggestion from Peter Montgomery: start with exponent n^2-1, as factors of Lucas and Fibonacci number are either +/-1 (mod index), and so is n. Therefore, index will appear as a factor of n^2-1 and be included in stage 1. Do this only when n is composite, otherwise all tests with prime n factor of a Cunningham number will succeed in stage 1. As in P-1, for small overhead, use that trick only when lg(n) <= sqrt(B1). */ if ((double) size_n <= B0 && mpz_probab_prime_p (n->orig_modulus, PROBAB_PRIME_TESTS) == 0) { mpz_mul (g, n->orig_modulus, n->orig_modulus); mpz_sub_ui (g, g, 1); pp1_mul (P0, P0, g, n, P, Q); } mpz_set_ui (g, 1); /* first loop through small primes <= sqrt(B1) */ for (p = 2.0; p <= B0; p = getprime (p)) { for (q = 1, r = p; r <= B1; r *= p) if (r > *B1done) q *= p; mpz_mul_d (g, g, q, Q); if (mpz_sizeinbase (g, 2) >= max_size) { pp1_mul (P0, P0, g, n, P, Q); mpz_set_ui (g, 1); if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); *B1done = p; goto clear_and_exit; } } } pp1_mul (P0, P0, g, n, P, Q); /* then all primes > sqrt(B1) and taken with exponent 1 */ for (; p <= B1; p = getprime (p)) { if (p > *B1done) pp1_mul_prac (P0, (unsigned long) p, n, P, Q, R, S, T); if (stop_asap != NULL && (*stop_asap) ()) { outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p); *B1done = p; goto clear_and_exit; } } *B1done = B1; mpres_sub_ui (P, P0, 2, n); #ifndef FULL_REDUCTION mpres_normalize (P); /* needed for gcd */ #endif mpres_gcd (f, P, n); youpi = mpz_cmp_ui (f, 1); clear_and_exit: getprime (FREE_PRIME_TABLE); /* free the prime tables, and reinitialize */ mpres_clear (Q, n); mpres_clear (R, n); mpres_clear (S, n); mpres_clear (T, n); mpz_clear (g); mpres_clear (P, n); return youpi; } /* checks if the factor p was found by P+1 or P-1 (when prime). a is the initial seed. */ static void pp1_check_factor (mpz_t a, mpz_t p) { if (mpz_probab_prime_p (p, PROBAB_PRIME_TESTS)) { mpz_mul (a, a, a); mpz_sub_ui (a, a, 4); if (mpz_jacobi (a, p) == 1) outputf (OUTPUT_NORMAL, "[factor found by P-1]\n"); } } /****************************************************************************** * * * Stage 2 * * * ******************************************************************************/ /* let alpha, beta be the roots of x^2-Px+1=0 set a, b such that alpha^e = a*alpha+b (idem for beta), i.e. a*x+b = rem(x^e, x^2-Px+1) */ static void pp1_mul2 (mpres_t a, mpres_t b, mpres_t P, mpz_t e, mpmod_t n) { unsigned long l; mpres_t t; if (mpz_cmp_ui (e, 0) == 0) /* x^0 = 1 */ { mpres_set_ui (a, 0, n); mpres_set_ui (b, 1, n); return; } /* now e >= 1 */ mpres_set_ui (a, 1, n); mpres_set_ui (b, 0, n); l = mpz_sizeinbase (e, 2) - 1; /* number of bits of e (minus 1) */ mpres_init (t, n); while (l--) { /* square: (ax+b)^2 = (a^2P+2ab) x + (b^2-a^2) */ mpres_mul (t, a, a, n); /* a^2 */ mpres_mul (a, a, b, n); mpres_mul_ui (a, a, 2, n); /* 2ab */ mpres_mul (b, b, b, n); /* b^2 */ mpres_sub (b, b, t, n); /* b^2-a^2 */ mpres_mul (t, t, P, n); /* a^2P */ mpres_add (a, t, a, n); /* a^2P+2ab */ if (mpz_tstbit (e, l)) /* multiply: (ax+b)*x = (aP+b) x - a */ { mpres_mul (t, a, P, n); mpres_add (t, t, b, n); mpres_neg (b, a, n); mpz_swap (a, t); } } mpres_clear (t, n); } /* Performs the following: for (i=0;id1, root_params->d2, dF); mpres_init (u, modulus); mpres_init (v, modulus); if (ABS(root_params->S) == 1) /* special code with d1/6 muls */ { mpres_init (fd[0], modulus); mpres_init (fd[1], modulus); mpres_init (fd[2], modulus); mpz_set_ui (*t, root_params->d2); pp1_mul (fd[2], *x, *t, modulus, u, v); mpres_get_z (F[0], fd[2], modulus); mpz_set_ui (*t, 7); pp1_mul (fd[0], fd[2], *t, modulus, u, v); mpz_set_ui (*t, 6); pp1_mul (fd[1], fd[2], *t, modulus, u, v); /* fd[0] = V_{7*d2}(P), fd[1] = V_{6*d2}(P), fd[2] = V_{d2}(P) */ outputf (OUTPUT_VERBOSE, "Initializing table of differences for F took %ldms\n", elltime (st1, cputime ())); i = 1; j = 7; while (i < dF) { if (gcd (j, root_params->d1) == 1) /* (d2,d1) == 1 ==> (j*d2,d1) == (j,d1) */ mpres_get_z (F[i++], fd[0], modulus); /* V_{m+n} = V_m * V_n - V_{m-n} */ /* fd[0] = V_m, fd[1] = V_n, fd[2] = V_{m-n} */ mpres_swap (fd[0], fd[2], modulus); mpres_mul (u, fd[2], fd[1], modulus); mpres_sub (fd[0], u, fd[0], modulus); /* fd[0] = V_{m+n}, fd[1] = V_n, fd[2] = V_m */ j += 6; muls ++; } mpres_clear (fd[0], modulus); mpres_clear (fd[1], modulus); mpres_clear (fd[2], modulus); } else /* case |S| <> 1: this code works also for S=1, but is more expensive, since it can use up to 4*(d1/6) muls */ { init_roots_state (&state, root_params->S, root_params->d1, root_params->d2, 1.0); mpz_set_ui (*t, 0); coeffs = init_progression_coeffs (*t, state.dsieve, root_params->d2, 1, 6, state.S, state.dickson_a); if (coeffs == NULL) return ECM_ERROR; state.fd = (point *) malloc (state.size_fd * sizeof (point)); if (state.fd == NULL) { clear_list (coeffs, state.size_fd); return ECM_ERROR; } for (i = 0; i < state.size_fd; i++) { mpres_init (state.fd[i].x, modulus); mpres_init (state.fd[i].y, modulus); /* if i = k*(S+1) + S for k>=1, we can copy x and y from i - (S+1) */ if (i > state.S && (i % (state.S + 1) == state.S)) { mpres_set (state.fd[i].x, state.fd[state.S].x, modulus); mpres_set (state.fd[i].y, state.fd[state.S].y, modulus); } else pp1_mul2 (state.fd[i].x, state.fd[i].y, x[0], coeffs[i], modulus); } clear_list (coeffs, state.size_fd); outputf (OUTPUT_VERBOSE, "Initializing table of differences for F took %ldms\n", elltime (st1, cputime ())); /* Now for the actual calculation of the roots. */ for (i = 0; i < dF && !youpi;) { /* Is this a rsieve value where we computed Dickson(j * d2) * X? */ if (gcd (state.rsieve, state.dsieve) == 1) { /* Did we use every progression since the last update? */ if (state.next == state.nr) { /* Yes, time to update again */ addWnm (state.fd, x[0], modulus, state.nr, state.S, &muls); state.next = 0; } /* Is this a j value where we want Dickson(j*d2)*X as a root? */ if (gcd (state.rsieve, root_params->d1) == 1) { /* we have alpha^k = x * alpha + y thus alpha^k + beta^k = x * P + 2 * y. FIXME: can we avoid returning to the Lucas form? */ mpres_mul (u, state.fd[state.next * (state.S + 1)].x, x[0], modulus); mpres_mul_ui (v, state.fd[state.next * (state.S + 1)].y, 2, modulus); mpres_add (u, u, v, modulus); mpres_get_z (F[i++], u, modulus); } state.next ++; } state.rsieve += 6; } for (i = 0; i < state.size_fd; i++) { mpres_clear (state.fd[i].x, modulus); mpres_clear (state.fd[i].y, modulus); } free (state.fd); } mpres_clear (u, modulus); mpres_clear (v, modulus); outputf (OUTPUT_VERBOSE, "Computing roots of F took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, " and %d muls", muls); outputf (OUTPUT_VERBOSE, "\n"); return youpi; } /* return NULL if an error occurred */ pp1_roots_state * pp1_rootsG_init (mpres_t *x, root_params_t *root_params, mpmod_t modulus) { mpres_t P; pp1_roots_state *state; unsigned long i; ASSERT (gcd (root_params->d1, root_params->d2) == 1); state = (pp1_roots_state *) malloc (sizeof (pp1_roots_state)); if (state == NULL) return NULL; /* we don't need the sign anymore after pp1_rootsG_init */ state->S = ABS(root_params->S); if (state->S == 1) { mpz_t t; mpz_init (t); mpres_init (P, modulus); for (i = 0; i < 4; i++) mpres_init (state->tmp[i], modulus); state->dsieve = root_params->d2; /* needed in pp1_rootsG */ /* We want to skip values where gcd((i0 + i) * d1, d2) != 1. We can test for gcd(i0 + i, d2) instead and let pp1_rootsG() advance state->rsieve in steps of 1 */ /* state->rsieve = i0 % d2 */ state->rsieve = mpz_fdiv_ui (root_params->i0, root_params->d2); outputf (OUTPUT_DEVVERBOSE, "pp1_rootsG_init: i0 = %Zd, state: " "dsieve = %d, rsieve = %d, S = %d\n", root_params->i0, state->dsieve, state->rsieve, state->S); mpz_set_ui (t, root_params->d1); pp1_mul (state->tmp[1], *x, t, modulus, state->tmp[3], P); pp1_mul (state->tmp[0], state->tmp[1], root_params->i0, modulus, state->tmp[3], P); mpz_sub_ui (t, root_params->i0, 1); mpz_abs (t, t); pp1_mul (state->tmp[2], state->tmp[1], t, modulus, state->tmp[3], P); /* for P+1, tmp[0] = V_s(P), tmp[1] = V_d1(P), tmp[2] = V_{|s-d1|}(P) */ mpres_clear (P, modulus); mpz_clear (t); } else { listz_t coeffs; state->dickson_a = (root_params->S < 0) ? -1 : 0; state->nr = (root_params->d2 > 1) ? root_params->d2 - 1 : 1; state->size_fd = state->nr * (state->S + 1); state->next = 0; state->dsieve = 1; state->rsieve = 1; state->fd = (point *) malloc (state->size_fd * sizeof (point)); if (state->fd == NULL) { free (state); return NULL; } coeffs = init_progression_coeffs (root_params->i0, root_params->d2, root_params->d1, 1, 1, state->S, state->dickson_a); if (coeffs == NULL) { free (state->fd); free (state); return NULL; } for (i = 0; i < state->size_fd; i++) { mpres_init (state->fd[i].x, modulus); mpres_init (state->fd[i].y, modulus); /* The S-th coeff of all progressions is identical */ if (i > state->S && i % (state->S + 1) == state->S) { /* Simply copy from the first progression */ mpres_set (state->fd[i].x, state->fd[state->S].x, modulus); mpres_set (state->fd[i].y, state->fd[state->S].y, modulus); } else pp1_mul2 (state->fd[i].x, state->fd[i].y, x[0], coeffs[i], modulus); } clear_list (coeffs, state->size_fd); } return state; } void pp1_rootsG_clear (pp1_roots_state *state, ATTRIBUTE_UNUSED mpmod_t modulus) { unsigned long i; if (state->S == 1) { for (i = 0; i < 4; i++) mpres_clear (state->tmp[i], modulus); } else { for (i = 0; i < state->size_fd; i++) { mpres_clear (state->fd[i].x, modulus); mpres_clear (state->fd[i].y, modulus); } free (state->fd); } free (state); } int pp1_rootsG (listz_t G, unsigned long dF, pp1_roots_state *state, mpmod_t modulus, mpres_t *x) { unsigned long i; unsigned long muls = 0; long st; st = cputime (); /* state->S is positive: we don't need the sign anymore, since the polynomial is defined by the table of differences */ if (state->S == 1) { for (i = 0; i < dF;) { if (gcd (state->rsieve, state->dsieve) == 1) { outputf (OUTPUT_TRACE, "pp1_rootsG: Taking root G[%d], rsieve = %d\n", i, state->rsieve); mpres_get_z (G[i++], state->tmp[0], modulus); } else { outputf (OUTPUT_TRACE, "pp1_rootsG: NOT taking root, rsieve = %d, gcd = %d\n", state->rsieve, gcd (state->rsieve, state->dsieve)); } mpres_swap (state->tmp[0], state->tmp[2], modulus); mpres_mul (state->tmp[3], state->tmp[2], state->tmp[1], modulus); mpres_sub (state->tmp[0], state->tmp[3], state->tmp[0], modulus); state->rsieve++; } } else { mpres_t u, v; mpres_init (u, modulus); mpres_init (v, modulus); for (i = 0; i < dF;) { /* Did we use every progression since the last update? */ if (state->next == state->nr) { /* Yes, time to update again */ addWnm (state->fd, x[0], modulus, state->nr, state->S, &muls); state->next = 0; } /* Is this a root we should skip? (Take only if gcd == 1) */ if (gcd (state->rsieve, state->dsieve) == 1) { mpres_mul (u, state->fd[state->next * (state->S + 1)].x, x[0], modulus); mpres_mul_ui (v, state->fd[state->next * (state->S + 1)].y, 2, modulus); mpres_add (u, u, v, modulus); mpres_get_z (G[i++], u, modulus); } state->next ++; state->rsieve ++; } mpres_clear (u, modulus); mpres_clear (v, modulus); } outputf (OUTPUT_VERBOSE, "Computing roots of G took %ldms", elltime (st, cputime ())); outputf (OUTPUT_DEVVERBOSE, ", %lu muls", dF); outputf (OUTPUT_VERBOSE, "\n"); return ECM_NO_FACTOR_FOUND; } /****************************************************************************** * * * Williams P+1 * * * ******************************************************************************/ /* Input: p is the initial generator (sigma), if 0 generate it at random. n is the number to factor B1 is the stage 1 bound B2 is the stage 2 bound k is the number of blocks for stage 2 verbose is the verbosity level Output: p is the factor found Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2) */ int pp1 (mpz_t f, mpz_t p, mpz_t n, mpz_t go, double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm, double B2scale, unsigned long k, const int S, int verbose, int repr, int use_ntt, FILE *os, FILE *es, char *TreeFilename, double maxmem, gmp_randstate_t rng, int (*stop_asap)(void)) { int youpi = ECM_NO_FACTOR_FOUND; int po2 = 0; /* Whether we should use power-of-2 poly degree */ long st; mpres_t a; mpmod_t modulus; mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */ unsigned long dF; root_params_t root_params; set_verbose (verbose); ECM_STDOUT = (os == NULL) ? stdout : os; ECM_STDERR = (es == NULL) ? stdout : es; /* if n is even, return 2 */ if (mpz_divisible_2exp_p (n, 1)) { mpz_set_ui (f, 2); return ECM_FACTOR_FOUND_STEP1; } st = cputime (); if (mpz_cmp_ui (p, 0) == 0) pm1_random_seed (p, n, rng); mpz_init_set (B2min, B2min_parm); mpz_init_set (B2, B2_parm); mpz_init (root_params.i0); /* Set default B2. See ecm.c for comments */ if (ECM_IS_DEFAULT_B2(B2)) mpz_set_d (B2, B2scale * pow (B1 * PP1_COST, DEFAULT_B2_EXPONENT)); /* set B2min */ if (mpz_sgn (B2min) < 0) mpz_set_d (B2min, B1); if (repr == ECM_MOD_MPZ) mpmod_init_MPZ (modulus, n); else if (repr == ECM_MOD_MODMULN) mpmod_init_MODMULN (modulus, n); else if (repr == ECM_MOD_REDC) mpmod_init_REDC (modulus, n); else if (abs (repr) > 16) { if (mpmod_init_BASE2 (modulus, repr, n) == ECM_ERROR) return ECM_ERROR; } else /* automatic choice */ mpmod_init (modulus, n, repr); if (use_ntt) po2 = 1; if (bestD (&root_params, &k, &dF, B2min, B2, po2, use_ntt, maxmem, (TreeFilename != NULL), modulus) == ECM_ERROR) { youpi = ECM_ERROR; goto clear_and_exit; } /* Set default degree for Brent-Suyama extension */ root_params.S = S; if (root_params.S == ECM_DEFAULT_S) { if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0) { /* For Fermat numbers, default is 1 (no Brent-Suyama) */ root_params.S = 1; } else { mpz_t t; mpz_init (t); mpz_sub (t, B2, B2min); root_params.S = choose_S (t); mpz_clear (t); } } if (test_verbose (OUTPUT_NORMAL)) { outputf (OUTPUT_NORMAL, "Using "); if (ECM_IS_DEFAULT_B1_DONE(*B1done)) outputf (OUTPUT_NORMAL, "B1=%1.0f", B1); else outputf (OUTPUT_NORMAL, "B1=%1.0f-%1.0f", *B1done, B1); if (mpz_cmp_d (B2min, B1) == 0) outputf (OUTPUT_NORMAL, ", B2=%Zd", B2); else outputf (OUTPUT_NORMAL, ", B2=%Zd-%Zd", B2min, B2); if (root_params.S > 0) outputf (OUTPUT_NORMAL, ", polynomial x^%u", root_params.S); else outputf (OUTPUT_NORMAL, ", polynomial Dickson(%u)", -root_params.S); /* don't print x0 in resume case */ if (ECM_IS_DEFAULT_B1_DONE(*B1done)) outputf (OUTPUT_NORMAL, ", x0=%Zd", p); outputf (OUTPUT_NORMAL, "\n"); } outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n", dF, k, root_params.d1, root_params.d2, root_params.i0); mpres_init (a, modulus); mpres_set_z (a, p, modulus); /* since pp1_mul_prac takes an unsigned long, we have to check that B1 <= MAX_ULONG */ if (B1 > (double) ULONG_MAX) { outputf (OUTPUT_ERROR, "Error, maximal step1 bound for P+1 is %lu\n", ULONG_MAX); youpi = ECM_ERROR; goto clear_and_exit; } if (B1 > *B1done) youpi = pp1_stage1 (f, a, modulus, B1, B1done, go, stop_asap); outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ())); if (test_verbose (OUTPUT_RESVERBOSE)) { mpz_t t; mpz_init (t); mpres_get_z (t, a, modulus); outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t); mpz_clear (t); } mpres_get_z (p, a, modulus); if (stop_asap != NULL && (*stop_asap) ()) goto clear_and_exit; if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0) youpi = stage2 (f, &a, modulus, dF, k, &root_params, ECM_PP1, use_ntt, TreeFilename, stop_asap); if (youpi > 0 && test_verbose (OUTPUT_NORMAL)) pp1_check_factor (p, f); /* tell user if factor was found by P-1 */ clear_and_exit: mpres_clear (a, modulus); mpmod_clear (modulus); mpz_clear (root_params.i0); mpz_clear (B2); mpz_clear (B2min); return youpi; }