Main Page Reference Manual Compound List File List
libecc/polynomial.h

00001 //
00012 //
00013 // This file is part of the libecc package.
00014 // Copyright (C) 2002 - 2004 by
00015 //
00016 // Carlo Wood, Run on IRC <carlo@alinoe.com>
00017 // RSA-1024 0x624ACAD5 1997-01-26                    Sign & Encrypt
00018 // Fingerprint16 = 32 EC A7 B6 AC DB 65 A6  F6 F6 55 DD 1C DC FF 61
00019 //
00020 // This program is free software; you can redistribute it and/or
00021 // modify it under the terms of the GNU General Public License
00022 // as published by the Free Software Foundation; either version 2
00023 // of the License, or (at your option) any later version.
00024 //
00025 // This program is distributed in the hope that it will be useful,
00026 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00027 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00028 // GNU General Public License for more details.
00029 //
00030 // You should have received a copy of the GNU General Public License
00031 // along with this program; if not, write to the Free Software
00032 // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00033 //
00034 
00035 #ifndef LIBECC_POLYNOMIAL_H
00036 #define LIBECC_POLYNOMIAL_H
00037 
00038 #include <stdexcept>
00039 #include <libecc/bitset.h>
00040 #include <libecc/debug.h>
00041 #if ECC_DEBUGOUTPUT
00042 #include <libcwd/cwprint.h>
00043 #endif
00044 
00045 #if ECC_DEBUG
00046 #define LIBECC_AUGMENTED 1
00047 #define LIBECC_INPLACE (1 || !LIBECC_AUGMENTED)
00048 #define LIBECC_SWAPCOLUMNS (1 || LIBECC_INPLACE)
00049 #else
00050 // Don't change these.
00051 #define LIBECC_AUGMENTED 0
00052 #define LIBECC_INPLACE 1
00053 #define LIBECC_SWAPCOLUMNS 1
00054 #endif
00055 
00056 namespacelibecc {
00057 
00058 // Forward declarations.
00059 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00060   classpolynomial;
00061 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00062   polynomial<m, k, k1, k2> operator*(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00063 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00064   polynomial<m, k, k1, k2> operator/(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00065 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00066   bool operator==(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00067 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00068   bool operator!=(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00069 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00070   std::ostream& operator<<(std::ostream&, polynomial<m, k, k1, k2> const&);
00071 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00072   std::ostream& operator<<(std::ostream&, typename polynomial<m, k, k1, k2>::xor_type const&);
00073 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00074   typename polynomial<m, k, k1, k2>::xor_type operator+(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00075 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00076   typename polynomial<m, k, k1, k2>::xor_type operator-(polynomial<m, k, k1, k2> const&, polynomial<m, k, k1, k2> const&);
00077 
00091 template<unsigned int m, unsigned int k, unsigned int k1 = 0, unsigned int k2 = 0>
00092   classpolynomial {
00093     public:
00097       typedef Operator::bitsetExpression<m, false, false, Operator::bitsetXOR> xor_type;
00098 
00099       // Fix this if you add members in front of M_coefficients.
00100       static size_t const offsetof_vector = bitset<m>::offsetof_vector;
00101 
00102     private:
00103       bitset<m> M_coefficients;
00104       static polynomial<m, k, k1, k2> const one;
00105       static bool S_normal_initialized;
00106       static bitset<m> S_normal;
00107 
00108     public:
00112       static polynomial const& unity(void) { return one; }
00113 
00114     public:
00118       polynomial(void) { }
00119 
00123       explicit polynomial(bitset_digit_t coefficients) : M_coefficients(coefficients) { }
00124 
00128       polynomial(polynomial const& p) : M_coefficients(p.M_coefficients) { }
00129 
00133       explicit polynomial(bitset<m> const& coefficients) : M_coefficients(coefficients) { }
00134 
00138       polynomial(std::string const& coefficients) : M_coefficients(coefficients) { }
00139 
00180       polynomial(xor_type const& expression) : M_coefficients(expression) { }
00181 
00185       polynomial& operator=(polynomial const& p) { M_coefficients = p.M_coefficients; return *this; }
00186 
00190       polynomial& operator=(bitset<m> const& coefficients) { M_coefficients = coefficients; return *this; }
00191 
00196       polynomial& operator=(xor_type const& expression);
00197 
00201       polynomial(polynomial const& b, polynomial const& c);
00202 
00206       static unsigned int const square_digits = 2 * bitset_base<m>::digits + 4;
00207 
00223       polynomial& square(bitset_digit_t* tmpbuf) const; // tmpbuf must be an array of `square_digits' bitset_digit_t.
00224 
00232       bool sqrt(void);
00233 
00234       // The field arithmetic is implemented in terms of operations on the bits.
00238       polynomial& operator+=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00239 
00243       polynomial& operator-=(polynomial const& p) { M_coefficients ^= p.M_coefficients; return *this; }
00244 
00248       polynomial& operator*=(polynomial const& p);
00249 #ifdef LIBECC_DOXYGEN
00250       // Stupid doxygen.
00262       polynomial& operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00263 #else
00264       // The real prototype.
00265       polynomial& operator*=(xor_type const& expr);
00266 #endif
00267 
00271       polynomial& operator/=(polynomial const& p);
00272 #ifdef LIBECC_DOXYGEN
00273       // Stupid doxygen.
00285       polynomial& operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr);
00286 #else
00287       // The real prototype.
00288       polynomial& operator/=(xor_type const& expr);
00289 #endif
00290 
00299       static bitset<m> const& normal(void) { if (!S_normal_initialized) calculate_normal(); return S_normal; }
00300 
00312       int trace(void) const
00313      {
00314         // This method was invented by me, so give me credit for it when you use it somewhere. Thank you.
00315         // Carlo Wood <carlo@alinoe.com> -- 4 December 2004.
00316         int tr = 0;
00317         if ((m & 1))
00318           tr = M_coefficients.template test<0>();
00319         if (((m - k) & 1))
00320           tr ^= M_coefficients.template test<m - k>();
00321         if (k1)
00322         {
00323           if (((m - k1) & 1))
00324             tr ^= M_coefficients.template test<m - k1>();
00325           if (((m - k2) & 1))
00326             tr ^= M_coefficients.template test<m - k2>();
00327         }
00328         return tr;
00329       }
00330 
00363       friend xor_type operator+ <>(polynomial const& p1, polynomial const& p2);
00364 
00373       friend xor_type operator- <>(polynomial const& p1, polynomial const& p2);
00374 
00378       friend polynomial operator* <>(polynomial const& p1, polynomial const& p2);
00379 #ifdef LIBECC_DOXYGEN
00380       // Only added for documentational reasons.
00386       friend bool operator*(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00392       friend bool operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00393 #endif
00394 
00398       friend polynomial operator/ <>(polynomial const& p1, polynomial const& p2);
00399 #ifdef LIBECC_DOXYGEN
00400       // Only added for documentational reasons.
00406       friend bool operator/(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00412       friend bool operator/(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00413 #endif
00414 
00418       friend bool operator== <>(polynomial const& p1, polynomial const& p2);
00419 #ifdef LIBECC_DOXYGEN
00420       // Only added for documentational reasons.
00428       friend bool operator==(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00436       friend bool operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00437 #endif
00438 
00442       friend bool operator!= <>(polynomial const& p1, polynomial const& p2);
00443 #ifdef LIBECC_DOXYGEN
00444       // Only added for documentational reasons.
00452       friend bool operator!=(polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2);
00460       friend bool operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2>::xor_type const& expr);
00461 #endif
00462 
00468       friend std::ostream& operator<< <>(std::ostream& os, polynomial const& p);
00469 #ifdef LIBECC_DOXYGEN
00470       // Only added for documentational reasons.
00476       friend std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2>::xor_type const& expr);
00477 #endif
00478 
00482       bitset<m> const& get_bitset(void) const{ return M_coefficients; }
00483 
00487       bitset<m>& get_bitset(void) { return M_coefficients; }
00488 
00489     private:
00490       static void reduce(bitset_digit_t* buf);
00491       static bitset_digit_t reducea(bitset_digit_t* a);
00492       static void calculate_normal(void);
00493 
00494       void multiply_with(polynomial const& p1, bitset<m>& result) const;
00495 #if ECC_DEBUG
00496 #if LIBECC_AUGMENTED
00497       void print_matrix(bitset<2 * m> const* matrix, bitset<m> const& pivotted);
00498 #else
00499       void print_matrix(bitset<m> const* matrix, bitset<m> const& pivotted);
00500 #endif
00501 #endif
00502   };
00503 
00504 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00505   polynomial<m, k, k1, k2> const polynomial<m, k, k1, k2>::one(1);
00506 
00507 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00508   bool polynomial<m, k, k1, k2>::sqrt(void)
00509   {
00510     if (!k1)
00511     {
00512       bitset<m> highbits;
00513       highbits.reset();
00514 
00515       // First convert all odd powers into even powers
00516       if ((m & 1) == 1)
00517       {
00518         if ((k & 1) == 1)               // m and k are odd?
00519         {
00520           for(unsigned int bit = 1; bit < m; bit += 2)
00521           {
00522             if (M_coefficients.test(bit))
00523             {
00524               if (bit >= m - k)
00525                 highbits.flip(bit + k - m);
00526               else
00527                 M_coefficients.flip(bit + k);
00528               highbits.flip(bit);
00529             }
00530           }
00531         }
00532         else                    // m is odd and k is even
00533         {
00534           for(unsigned int bit = 1; bit < m; bit += 2)
00535           {
00536             if (M_coefficients.test(bit))
00537             {
00538               if (bit >= m - k)
00539               {
00540                 M_coefficients.flip(bit + 2 * k - m);
00541                 M_coefficients.flip(bit + k - m);
00542               }
00543               else
00544                 M_coefficients.flip(bit + k);
00545               highbits.flip(bit);
00546             }
00547           }
00548         }
00549       }
00550       else if ((k & 1) == 1)    // m is even and k is odd
00551       {
00552         for(unsigned int bit = 1; bit < m; bit += 2)
00553         {
00554           if (M_coefficients.test(bit))
00555           {
00556             if (bit < k)
00557             {
00558               M_coefficients.flip(bit + k);
00559               M_coefficients.flip(bit + m - k);
00560               highbits.flip(bit + m - k);
00561             }
00562             else
00563             {
00564               M_coefficients.flip(bit - k);
00565               highbits.flip(bit - k);
00566             }
00567           }
00568         }
00569       }
00570       else                      // m and k are both even (actually, this should never be used as reduction polynomial).
00571       {
00572         for(unsigned int bit = 1; bit < m; bit += 2)
00573           if (M_coefficients.test(bit))
00574             return false;               // This can't be a square
00575       }
00576 
00577       // Next handle the remaining even powers
00578       unsigned int bit_to = 1;
00579       for(unsigned int bit = 2; bit < m; bit += 2)
00580       {
00581         if (M_coefficients.test(bit))
00582           M_coefficients.set(bit_to);
00583         else
00584           M_coefficients.clear(bit_to);
00585         ++bit_to;
00586       }
00587       for(unsigned int bit = m % 2; bit < m; bit += 2)
00588       {
00589         if (highbits.test(bit))
00590           M_coefficients.set(bit_to);
00591         else
00592           M_coefficients.clear(bit_to);
00593         ++bit_to;
00594       }
00595     }
00596     else
00597     {
00598       structRoot {
00599         polynomial<m, k, k1, k2> value;
00600         Root(polynomial<m, k, k1, k2> const& p) : value(p)
00601         {
00602           bitset_digit_t p2buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00603           polynomial<m, k, k1, k2>& p2 = value.square(p2buf);
00604           bitset_digit_t p4buf[libecc::polynomial<m, k, k1, k2>::square_digits];
00605           polynomial<m, k, k1, k2>& p4 = p2.square(p4buf);
00606           for (unsigned int i = 1; i < m / 2; ++i)
00607           {
00608             p4.square(p2buf);
00609             p2.square(p4buf);
00610           }
00611           value = (m % 2 == 0) ? p2 : p4;
00612         }
00613       };
00614       static Root const root_of_t(polynomial<m, k, k1, k2>(2));
00615       polynomial<m, k, k1, k2> tmp(0);
00616       bitset<m> tmp2;
00617       tmp2.reset();
00618       for(unsigned int bit = 0; bit < m / 2; ++bit)
00619       {
00620         if (M_coefficients.test(2 * bit))
00621           tmp2.set(bit);
00622         if (M_coefficients.test(2 * bit + 1))
00623           tmp.get_bitset().set(bit);
00624       }
00625       if (m % 2 == 1 && M_coefficients.test(m - 1))
00626         tmp2.set(m / 2);
00627       M_coefficients = tmp2;
00628       *this += tmp * root_of_t.value;
00629     }
00630     return true;
00631   }
00632 
00633 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00634   inline polynomial<m, k, k1, k2>&
00635   polynomial<m, k, k1, k2>::operator*=(polynomial const& p)
00636   {
00637     multiply_with(p, M_coefficients);
00638     return *this;
00639   }
00640 
00641 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00642   inline polynomial<m, k, k1, k2>&
00643   polynomial<m, k, k1, k2>::operator*=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
00644   {
00645     return (*this *= polynomial<m, k, k1, k2>(expr));
00646   }
00647 
00648 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00649   inline polynomial<m, k, k1, k2>&
00650   polynomial<m, k, k1, k2>::operator=(xor_type const& expression)
00651   {
00652     M_coefficients = expression;
00653     return *this;
00654   }
00655 
00656 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00657   void
00658   polynomial<m, k, k1, k2>::multiply_with(polynomial const& p1, bitset<m>& result) const
00659  {
00660     bitset_digit_t output[bitset<m>::digits * 2] __attribute__ ((aligned (8)));
00661 
00662     // Find the first non-zero digit in the input polynomial of this object.
00663     unsigned int digit = 0;
00664     while(M_coefficients.digit(digit) == 0)             // Still zero?
00665     {
00666       output[digit] = 0;                                // That means that the output will end on zero too.
00667       if (++digit == bitset<m>::digits)
00668       {
00669         result.reset();                                 // The whole polynomial is zero, the result will be zero too.
00670         return;
00671       }
00672     }
00673     unsigned int uninitialized_digit = digit;           // The next digit of `output' that has not yet been initialized.
00674     // Find the first digit in the input polynomial of this object whose first bit is set.
00675     for(; digit < bitset<m>::digits; ++digit)
00676     {
00677       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00678       {
00679         // Set the output to p1 times this bit.
00680         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00681           output[d + digit] = p1.get_bitset().digit(d);
00682         uninitialized_digit = bitset<m>::digits + digit;
00683         ++digit;                                        // Set to the next input digit.
00684         break;
00685       }
00686       output[digit] = 0;                                // Initialize this digit of the output to 0.
00687       ++uninitialized_digit;
00688     }
00689     // Set the remaining digits to zero, if any.
00690     for(unsigned int remaining_digit = uninitialized_digit; remaining_digit < sizeof(output) / sizeof(bitset_digit_t); ++remaining_digit)
00691       output[remaining_digit] = 0;
00692     // Find for the remaining input digits the ones that have their first bit set.
00693     for(; digit < bitset<m>::digits; ++digit)
00694       if ((M_coefficients.digit(digit) & 1))            // Is the first bit set?
00695       {
00696         // Add p1 times this bit to the output.
00697         for (unsigned int d = 0; d < bitset<m>::digits; ++d)
00698           output[d + digit] ^= p1.get_bitset().digit(d);
00699       }
00700     // Create a bitset that will contain p1, shifted at most bitset_digit_bits - 1 to the left.
00701     bitset<m + bitset_digit_bits - 1> shifted_p1;
00702     // Start with having it shifted 1 bit to the left.
00703     bitset_digit_t carry = 0;
00704     unsigned int d = 0;
00705     for(bitset_digit_t const* ptr = p1.get_bitset().digits_ptr(); ptr < p1.get_bitset().digits_ptr() + bitset<m>::digits; ++ptr, ++d)
00706     {
00707       shifted_p1.rawdigit(d) = (*ptr << 1) | carry;
00708       carry = *ptr >> (8 * sizeof(bitset_digit_t) - 1);
00709     }
00710     if (d < bitset<m + bitset_digit_bits - 1>::digits)
00711       shifted_p1.rawdigit(d) = carry;
00712     for(bitset_digit_t bitmask = 2;;)
00713     {
00714       for(unsigned int digit = 0; digit < bitset<m>::digits; ++digit)
00715         if ((M_coefficients.digit(digit) & bitmask))
00716         {
00717           for (unsigned int d = 0; d < shifted_p1.digits; ++d)
00718             output[d + digit] ^= shifted_p1.digit(d);
00719         }
00720       bitmask <<= 1;            // Next bit.
00721       if (bitmask == 0)         // Done?
00722         break;
00723       // Shift p1 one bit further to the left.
00724       shifted_p1.template shift_op<1, left, assign>(shifted_p1);
00725     }
00726     // Reduce the resulting output of the multiplication.
00727     reduce(output);
00728     // Copy the reduced output to `result'.
00729     std::memcpy(result.digits_ptr(), output, bitset<m>::digits * sizeof(bitset_digit_t));
00730   }
00731 
00732 #if ECC_DEBUG
00733 template<unsigned int m>
00734 structdiv_tct {
00735   bitset_digit_t const* M_p;
00736   int M_deg;
00737   int M_low;
00738   div_tct(bitset<m> const& b, int deg, int low) : M_p(b.digits_ptr()), M_deg(deg), M_low(low) { }
00739   void print_on(std::ostream& os) const
00740  {
00741     int lowbit = (M_low >> bitset_digit_bits_log2) * bitset_digit_bits;
00742     if (lowbit > 0)
00743       lowbit = 0;
00744     for (int b = 2 * m - 1; b >= lowbit; --b)
00745     {
00746       if (b == M_deg)
00747         os << "\e[31m";
00748       int digitoffset = (b >> bitset_digit_bits_log2);
00749       bitset_digit_t mask = 1 << (b & (bitset_digit_bits - 1));
00750       if (M_p[digitoffset] & mask)
00751         os << '1';
00752       else
00753         os << '0';
00754       if (b == M_low)
00755         os << "\e[0m";
00756       if (b == 0)
00757         os << '.';
00758     }
00759   }
00760 };
00761 #endif
00762 
00763 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
00764   polynomial<m, k, k1, k2>&
00765   polynomial<m, k, k1, k2>::operator/=(polynomial const& p)
00766   {
00767 #if ECC_DEBUG
00768     LibEccDout(dc::polynomial|noprefix_cf, "");
00769     LibEccDout(dc::polynomial, "Entering polynomial<" << m << ", " << k << ", " << k1 << ", " << k2 << ">::operator/=()");
00770     polynomial<m, k, k1, k2> x(p.get_bitset());
00771     polynomial<m, k, k1, k2> y(M_coefficients);
00772     LibEccDout(dc::polynomial, "x(t) = " << x);
00773     LibEccDout(dc::polynomial|flush_cf, "y(t) = " << y);
00774 #endif
00775 
00776     // The following algorithm is based on the algorithm
00777     // described in http://research.sun.com/techrep/2001/smli_tr-2001-95.ps
00778     // with significant optimization changes by Carlo Wood.
00779 
00780     // Make sure that there is enough space for a full bitset object
00781     // and align the bitsets on a multiple of bitset_digit_t.
00782     static unsigned int const digit_offset_UV = ((sizeof(bitset<m>) * 8 - 1) / bitset_digit_bits + 1);
00783     static unsigned int const offset_UV = digit_offset_UV * bitset_digit_bits;
00784     // Make room for exponents from at least t^-m till t^2m.
00785     static unsigned int const digit_size_UV = 3 * digit_offset_UV;
00786     // Variables A and B do not need this much space.
00787     static unsigned int const digit_size_AB = bitset<m>::digits;
00788     // One digit of padding, needed for assembly routine.
00789     static unsigned int const padding_digit_size = 1;
00790 
00791     // Declare stack space for four variables.
00792     bitset_digit_t bitpool [5 * padding_digit_size + 2 * digit_size_AB + 2 * digit_size_UV]
00793         __attribute__ ((__aligned__ (32)));
00794     std::memset((char*)bitpool, 0, sizeof(bitpool));
00795 
00796     bitset<m>& A(*(bitset<m>*)&bitpool[padding_digit_size]);
00797     bitset<m>& B(*(bitset<m>*)&bitpool[2 * padding_digit_size + digit_size_AB]);
00798     bitset<m>& U(*(bitset<m>*)&bitpool[3 * padding_digit_size + 2 * digit_size_AB + digit_offset_UV]);
00799     bitset<m>& V(*(bitset<m>*)&bitpool[4 * padding_digit_size + 2 * digit_size_AB + digit_size_UV + digit_offset_UV]);
00800 
00801     // The representation of U and V will be done with bitsets of size `digit_size_UV * bitset_digit_bits'.
00802     // This means that they contain powers of t with a negative exponent.
00803     // That is not a problem as those are well defined: t^(-n) = 1 / t^n.
00804 
00805     // Let rp = M(t) = t^m + t^k [+ t^k1 + t^k2] + 1.
00806 #if ECC_DEBUG
00807     bitset<m + 1> rp("1");
00808     rp.template set<m>();
00809     rp.template set<k>();
00810     if (k1)
00811     {
00812       rp.template set<k1>();
00813       rp.template set<k2>();
00814     }
00815 #endif
00816 
00817     // Let U(t) = y(t) (= M_coefficients).
00818     LibEccDout(dc::polynomial|flush_cf, "U <- y");
00819     U = M_coefficients;
00820 
00821     // Guess the maximum and minimum powers to be the possible limits.
00822     int degU = m - 1;
00823     int lowU = 0;
00824 
00825     // Let A(t) = x(t).
00826     LibEccDout(dc::polynomial|flush_cf, "A <- x");
00827     A = p.get_bitset();
00828 
00829     // Then
00830     //
00831     // A(t) * y(t) = U(t) * x(t)  [mod M(t)].
00832 
00833     // Let V(t) = 0
00834     // Let B = M(t)
00835     //
00836     // Then
00837     //
00838     // B(t) * y(t) = V(t) * x(t)  [mod M(t)].
00839     //
00840     // Let degA be the highest power of t in A.
00841     typename bitset<m>::const_reverse_iterator degA = A.rbegin();
00842     degA.find1();
00843     LibEccDout(dc::polynomial|flush_cf, "deg(A) == " << degA);
00844 
00845     // Let lowA be the lowest power of t in A.
00846     typename bitset<m>::const_iterator lowA = A.begin();
00847     lowA.find1();
00848     LibEccDout(dc::polynomial|flush_cf, "low(A) == " << lowA);
00849 
00850     unsigned int sizeA = degA.get_index() - lowA.get_index();
00851 
00852     // Let n = m - deg(A).
00853     unsigned int n = m - degA.get_index();
00854     //
00855     // Then B'(t) = B(t) - A(t) * t^n will have a degree less than m.
00856     // And
00857     //
00858     // B'(t) * y(t) = B(t) * y(t) - A(t) * y(t) * t^n =
00859     //              = V(t) * x(t) - U(t) * x(t) * t^n =
00860     //              = (V(t) - U(t) * t^n) * x(t) =
00861     //              = V'(t) * x(t)                      [mod M(t)].
00862     //
00863     // B <- B'
00864     LibEccDout(dc::polynomial|flush_cf, "B <- A * t^" << n << " + " << cwprint_using(rp, &bitset<m+1>::base2_print_on));
00865     B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), n);
00866     B.template flip<m>();
00867     B.template flip<k>();
00868     if (k1)
00869     {
00870       B.template flip<k1>();
00871       B.template flip<k2>();
00872     }
00873     B.template flip<0>();
00874 
00875     // Let degB be the highest power of t in B.
00876     typename bitset<m>::const_reverse_iterator degB = B.rbegin();
00877     degB.find1();
00878     LibEccDout(dc::polynomial|flush_cf, "deg(B) == " << degB);
00879 
00880     // Let lowB be the lowest power of t in B.
00881     typename bitset<m>::const_iterator lowB = B.begin();
00882     lowB.find1();
00883     LibEccDout(dc::polynomial|flush_cf, "low(B) == " << lowB);
00884 
00885     // V <- V'
00886     LibEccDout(dc::polynomial|flush_cf, "V <- U * t^" << n <<
00887         "  [mod " << cwprint_using(rp, &bitset<m + 1>::base2_print_on) << "]");
00888     V.xor_with_zero_padded(U, 0, m - 1, n);
00889 
00890     int degV = degU + n;
00891     int lowV = lowU + n;
00892     
00893     unsigned int sizeB = degB.get_index() - lowB.get_index();
00894 
00895     if (sizeA > 0 && sizeB > 0)
00896       for(;;)
00897       {
00898         LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00899         LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00900         LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00901         LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00902         if (sizeA < sizeB)
00903         {
00904           int left_shift = lowB.get_index() - lowA.get_index();
00905           LibEccDout(dc::polynomial|flush_cf, "B <- B + A * t^" << left_shift);
00906           B.xor_with_zero_padded(A, lowA.get_index(), degA.get_index(), left_shift);
00907           degB.find1();
00908           lowB.find1();
00909           sizeB = degB.get_index() - lowB.get_index();
00910           LibEccDout(dc::polynomial|flush_cf, "V <- V + U * t^" << left_shift);
00911           V.xor_with_zero_padded(U, lowU, degU, left_shift);
00912           degV = std::max(degV, degU + left_shift);
00913           lowV = std::min(lowV, lowU + left_shift);
00914           if (sizeB == 0)
00915             break;
00916         }
00917         else
00918         {
00919           int left_shift = lowA.get_index() - lowB.get_index();
00920           LibEccDout(dc::polynomial|flush_cf, "A <- A + B * t^" << left_shift);
00921           A.xor_with_zero_padded(B, lowB.get_index(), degB.get_index(), left_shift);
00922           degA.find1();
00923           lowA.find1();
00924           sizeA = degA.get_index() - lowA.get_index();
00925           LibEccDout(dc::polynomial|flush_cf, "U <- U + V * t^" << left_shift);
00926           U.xor_with_zero_padded(V, lowV, degV, left_shift);
00927           degU = std::max(degU, degV + left_shift);
00928           lowU = std::min(lowU, lowV + left_shift);
00929           if (sizeA == 0)
00930             break;
00931         }
00932       }
00933 
00934     LibEccDout(dc::polynomial|flush_cf, "A = " << cwprint(div_tct<m>(A, degA.get_index(), lowA.get_index())));
00935     LibEccDout(dc::polynomial|flush_cf, "B = " << cwprint(div_tct<m>(B, degB.get_index(), lowB.get_index())));
00936     LibEccDout(dc::polynomial|flush_cf, "U = " << cwprint(div_tct<m>(U, degU, lowU)));
00937     LibEccDout(dc::polynomial|flush_cf, "V = " << cwprint(div_tct<m>(V, degV, lowV)));
00938 
00939     bitset<m>* R;
00940     // 'F' (Floating-point polynomial) will be shifted to the right and
00941     // is therefore defined to run from t^-2m till t^2m.  This means it will
00942     // be shifted OVER the other bitsets, but we don't need those anymore anyway.
00943     static unsigned int const offset_F = 2 * offset_UV;
00944     static unsigned int const size_F = 2 * m + offset_F;
00945     bitset<size_F>* F;
00946     int low1, lowR;
00947 #if ECC_DEBUG
00948     int degR;
00949 #endif
00950     if (sizeA == 0)
00951     {
00952       LibEccDout(dc::polynomial|flush_cf, "R = U");
00953       R = &U;
00954       // tmp to avoid 'warning: type-punning to incomplete type might break strict-aliasing rules'
00955       bitset_digit_t* tmp = &bitpool[3 * padding_digit_size + 2 * digit_size_AB - digit_offset_UV];
00956       F = reinterpret_cast<bitset<size_F>*>(tmp);
00957       low1 = lowA.get_index();
00958       lowR = lowU;
00959 #if ECC_DEBUG
00960       degR = degU;
00961 #endif
00962     }
00963     else // sizeB == 0
00964     {
00965       LibEccDout(dc::polynomial|flush_cf, "R = V");
00966       R = &V;
00967       // tmp to avoid 'warning: type-punning to incomplete type might break strict-aliasing rules'
00968       bitset_digit_t* tmp = &bitpool[4 * padding_digit_size + 2 * digit_size_AB + digit_size_UV - digit_offset_UV];
00969       F = reinterpret_cast<bitset<size_F>*>(tmp);
00970       low1 = lowB.get_index();
00971       lowR = lowV;
00972 #if ECC_DEBUG
00973       degR = degV;
00974 #endif
00975     }
00976 
00977     *F >>= low1;
00978     lowR -= low1;
00979 #if ECC_DEBUG
00980     degR -= low1;
00981 #endif
00982     // Get rid of negative exponents.
00983     LibEccDout(dc::polynomial|flush_cf, "lowR = " << lowR);
00984     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
00985     if ((!k1 && k >= 32) || k2 >= 32)
00986     {
00987       static int const digit_shift_k2 = k2 >> bitset_digit_bits_log2;
00988       static int const bit_shift_k2 = k2 & (bitset_digit_bits  - 1);
00989       static int const digit_shift_k1 = k1 >> bitset_digit_bits_log2;
00990       static int const bit_shift_k1 = k1 & (bitset_digit_bits  - 1);
00991       static int const digit_shift_k = k >> bitset_digit_bits_log2;
00992       static int const bit_shift_k = k & (bitset_digit_bits  - 1);
00993       static int const digit_shift_m = m >> bitset_digit_bits_log2;
00994       static int const bit_shift_m = m & (bitset_digit_bits  - 1);
00995       static int const thirtytwo_minus_bit_shift_k2_with_compile_warning_evasion = (32 - bit_shift_k2) & (bitset_digit_bits  - 1);
00996       static int const thirtytwo_minus_bit_shift_k1_with_compile_warning_evasion = (32 - bit_shift_k1) & (bitset_digit_bits  - 1);
00997       static int const thirtytwo_minus_bit_shift_k_with_compile_warning_evasion = (32 - bit_shift_k) & (bitset_digit_bits  - 1);
00998       static int const thirtytwo_minus_bit_shift_m_with_compile_warning_evasion = (32 - bit_shift_m) & (bitset_digit_bits  - 1);
00999       int first_digit = (lowR + offset_F) >> bitset_digit_bits_log2;
01000       bitset_digit_t* ptr = F->digits_ptr() + first_digit;
01001       bitset_digit_t* ptr1 = R->digits_ptr();
01002       while(ptr < ptr1)
01003       {
01004         if (k1)
01005         {
01006           ptr[digit_shift_k2] ^= (*ptr) << bit_shift_k2;
01007           if (bit_shift_k2 != 0)
01008             ptr[digit_shift_k2 + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_k2_with_compile_warning_evasion;
01009           ptr[digit_shift_k1] ^= (*ptr) << bit_shift_k1;
01010           if (bit_shift_k1 != 0)
01011             ptr[digit_shift_k1 + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_k1_with_compile_warning_evasion;
01012         }
01013         ptr[digit_shift_k] ^= (*ptr) << bit_shift_k;
01014         if (bit_shift_k != 0)
01015           ptr[digit_shift_k + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_k_with_compile_warning_evasion;
01016         ptr[digit_shift_m] ^= (*ptr) << bit_shift_m;
01017         if (bit_shift_m != 0)
01018           ptr[digit_shift_m + 1] ^= (*ptr) >> thirtytwo_minus_bit_shift_m_with_compile_warning_evasion;
01019         ++ptr;
01020       }
01021     }
01022     else
01023     {
01024       for (unsigned int i = lowR + offset_F; i < offset_F; ++i)
01025       {
01026         if (F->test(i))
01027         {
01028 #if ECC_DEBUG
01029           F->flip(i);           // This is not really needed, but prints nicer output below.
01030 #endif
01031           if (k1)
01032           {
01033             F->flip(i + k2);
01034             F->flip(i + k1);
01035           }
01036           F->flip(i + k);
01037           F->flip(i + m);
01038         }
01039       }
01040     }
01041 #if ECC_DEBUG
01042     lowR = 0;
01043     degR = 2 * m - 1;
01044 #endif
01045     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01046     reduce(R->digits_ptr());
01047 #if ECC_DEBUG
01048     degR = m - 1;
01049 #endif
01050     LibEccDout(dc::polynomial|flush_cf, "R = " << cwprint(div_tct<m>(*R, degR, lowR)));
01051     M_coefficients = *R;
01052 
01053     return *this;
01054   }
01055 
01056 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01057   inline polynomial<m, k, k1, k2>&
01058   polynomial<m, k, k1, k2>::operator/=(typename polynomial<m, k, k1, k2>::xor_type const& expr)
01059   {
01060     return (*this /= polynomial<m, k, k1, k2>(expr));
01061   }
01062 
01063 // Solve x^2 + b x = c.
01064 // Assuming that b != 0, there are 2 solutions: x1 and x1 + b.
01065 // This means that during the 'wiping' of the matrix in order
01066 // to solve x, one bit of x will stay undetermined.  We need
01067 // to take special care to make sure that this will be a bit
01068 // for which a bit of 'b' is set, otherwise we'd return a wrong
01069 // value.
01070 //
01071 // If b equals zero, then the solution is the sqrt(c).  Otherwise
01072 // we can devide both sides of the equation by b^2 and solve
01073 // y^2 + y = c/b^2, and set x = b * y.
01074 //
01075 // There will only be a solution to this equation iff 0 = Tr(c/b^2).
01076 // (simply square the equation m-1 times and add them all up).
01077 //
01078 // Note that if y1 is a solution, then so is y1 + 1, hence we
01079 // cannot determine the least significant bit of y.
01080 //
01081 // It is possible to compose a matrix A such that Ax = x^2 + x
01082 // because squaring is an automorphism of the field:
01083 // x is a sum of basis elements, ie x = b1 + b2 + b3 and
01084 // x^2 = b1^2 + b2^2 + b3^2.  Therefore, if there exists a
01085 // matrix S such that Sb_i = b_i^2 for any basis element then
01086 // A = (S + I).  Moreover, such a matrix S must exist because
01087 // there are exactly m basis elements, and a matrix of mxm
01088 // will always be able to satisfy that.
01089 
01090 #if ECC_DEBUG
01091 // Debug function.
01092 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01093   void polynomial<m, k, k1, k2>::print_matrix(
01094 #if LIBECC_AUGMENTED
01095        bitset<2 * m> const* matrix,
01096 #else
01097        bitset<m> const* matrix,
01098 #endif
01099        bitset<m> const& pivotted)
01100   {
01101     // Print the matrix.
01102     for (unsigned int n = 1; n < m; n *= 10)
01103     {
01104       LibEccDout(dc::gaussj|continued_cf, "  ");
01105       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01106       {
01107         if (bit == m)
01108           LibEccDout(dc::continued, ' ');
01109         if ((bit % m) >= 1 && (bit % m) < (m + 1) / 2)
01110           LibEccDout(dc::continued, "+ ");
01111         else if (pivotted.test(bit % m))
01112           LibEccDout(dc::continued, (((bit % m) / n) % 10) << ' ');
01113         else
01114           LibEccDout(dc::continued, "  ");
01115       }
01116       LibEccDout(dc::finish, "");
01117     }
01118     for (unsigned int row = 0; row < m; ++row)
01119     {
01120       std::string line;
01121       if (row >= 1 && row < (m + 1) / 2)
01122         line = "+ ";
01123       else if (pivotted.test(row))
01124         line = "* ";
01125       else
01126         line = "  ";
01127       for (unsigned int bit = 0; bit < matrix->number_of_bits; ++bit)
01128       {
01129         if (bit == m)
01130           line += ' ';
01131         bool isset = matrix[row].test(bit);
01132         bool need_color = LIBECC_INPLACE && (matrix->number_of_bits > m) &&
01133             (((bit % m) >= 1 && (bit % m) < (m + 1) / 2) || pivotted.test(bit % m));
01134         if (need_color)
01135         {
01136           unsigned int corresponding_bit = (bit + m) % (2 * m);
01137           if (isset == matrix[row].test(corresponding_bit))
01138             line += "\e[32m";
01139           else
01140             line += "\e[31m";
01141         }
01142         line += (isset ? '1' : '0');
01143         if (need_color)
01144           line += "\e[0m";
01145         line += ' ';
01146       }
01147       LibEccDout(dc::gaussj, line);
01148     }
01149     LibEccDout(dc::gaussj|noprefix_cf, "");
01150   }
01151 #endif
01152 
01153 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01154   polynomial<m, k, k1, k2>::polynomial(polynomial<m, k, k1, k2> const& b, polynomial<m, k, k1, k2> const& c) :
01155       M_coefficients(0)
01156   {
01157     // If b == 0, then x = sqrt(c).
01158     if (!b.M_coefficients.any())
01159     {
01160       M_coefficients = c.M_coefficients;
01161       sqrt();
01162       return;
01163     }
01164 
01165     // Calculate c/b^2.
01166     bitset_digit_t b2buf[square_digits];
01167     polynomial<m, k, k1, k2>& b2 = b.square(b2buf);
01168     polynomial<m, k, k1, k2> cdb2(c);
01169     cdb2 /= b2;
01170     if (cdb2.trace() == 1)
01171       throw std::domain_error("x^2 + bx = c has no solution");
01172 
01173 #if LIBECC_AUGMENTED
01174     typedef bitset<2 * m> matrixrow_type;
01175 #else
01176     typedef bitset<m> matrixrow_type;
01177 #endif
01178     static matrixrow_type matrix[m];            // A mx2m or mxm matrix.
01179     static bool matrix_initialized;
01180     if (!matrix_initialized)
01181     {
01182       std::memset(matrix, 0, sizeof(matrix));
01183       // Fill this matrix with either the augmented matrix (A|I) or with just A,
01184       // where A is the matrix such that Ax = x^2 + x.
01185       for (unsigned int bit = 0; bit < m; ++bit)
01186       {
01187         matrix[bit].set(bit);           // The I of A = (S + I).
01188 #if LIBECC_AUGMENTED
01189         matrix[bit].set(bit + m);               // The I of (A|I).
01190 #endif
01191       }
01192       for (unsigned int bit = 0; bit < (m + 1) / 2; ++bit)
01193         matrix[2 * bit].flip(bit);      // The square of low exponents.
01194       for (unsigned int bit = (m + 1) / 2; bit < m; ++bit)
01195         matrix[2 * bit - m].set(bit);   // Reduction with m.
01196       for (unsigned int bit = (m + 1) / 2; bit < m - k / 2; ++bit)
01197         matrix[2 * bit - m + k].flip(bit);      // Reduction with m - k.
01198       if (k1)
01199       {
01200         for (unsigned int bit = (m + 1) / 2; bit < m - k1 / 2; ++bit)
01201           matrix[2 * bit - m + k1].flip(bit);   // Reduction with m - k1.
01202         for (unsigned int bit = (m + 1) / 2; bit < m - k2 / 2; ++bit)
01203           matrix[2 * bit - m + k2].flip(bit);   // Reduction with m - k2.
01204       }
01205       for (unsigned int bit = m - k / 2; bit < m; ++bit)
01206       {
01207         matrix[2 * bit - m + k - m].flip(bit);
01208         matrix[2 * bit - m + k - m + k].flip(bit);
01209         if (k1)
01210         {
01211           matrix[2 * bit - m + k - m + k1].flip(bit);
01212           matrix[2 * bit - m + k - m + k2].flip(bit);
01213         }
01214       }
01215       if (k1)
01216       {
01217         for (unsigned int bit = m - k1 / 2; bit < m; ++bit)
01218         {
01219           matrix[2 * bit - m + k1 - m].flip(bit);
01220           matrix[2 * bit - m + k1 - m + k].flip(bit);
01221           matrix[2 * bit - m + k1 - m + k1].flip(bit);
01222           matrix[2 * bit - m + k1 - m + k2].flip(bit);
01223         }
01224         for (unsigned int bit = m - k2 / 2; bit < m; ++bit)
01225         {
01226           matrix[2 * bit - m + k2 - m].flip(bit);
01227           matrix[2 * bit - m + k2 - m + k].flip(bit);
01228           matrix[2 * bit - m + k2 - m + k1].flip(bit);
01229           matrix[2 * bit - m + k2 - m + k2].flip(bit);
01230         }
01231       }
01232 
01233       bitset<m> pivotted;
01234       pivotted.reset();
01235 
01236       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01237 
01238       // Next, wipe it, so that the left half becomes I.
01239       // The first half is easy.
01240       for (unsigned int wipecol = 1; wipecol < (m + 1) / 2; ++wipecol)
01241       {
01242         matrix[2 * wipecol] ^= matrix[wipecol];
01243 #if LIBECC_INPLACE
01244         matrix[2 * wipecol].set(wipecol);               // Store the inverse in-place, destroying the original.
01245 #endif
01246       }
01247 
01248       // The second half is not.  Use Gauss-Jordan here.
01249       // Note that pivotting is hardly necessary because our arithmetic is infinitely accurate,
01250       // but we still need to find a '1' when we encounter a '0' on the main diagonal of course.
01251       // There will always be at least one '1' in every column, so that partial pivotting suffices
01252       // (speeding up things obviously), with the exception of the case where that '1' is only
01253       // found in row 0 (which is our 'singular' row and needs some special attention).
01254       // The row swapping is done because it is needed if we want to do our work "in-place",
01255       // reducing the amount of memory needed with a factor of two.
01256 
01257       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01258 
01259       unsigned int rowswaps[m];
01260       rowswaps[0] = 0;
01261       unsigned int colswaps[m], colswaps_inverse[m];
01262       for (unsigned int row = 0; row < m; ++row)
01263       {
01264         colswaps[row] = row;
01265         colswaps_inverse[row] = row;
01266       }
01267 
01268       // Run over all remaining columns and wipe them, immedeately replacing them with the result
01269       // since once a column is wiped we don't need its contents anymore.  Moreover, while wiping
01270       // the column it is optionally swapped with another column at the same time.  This, of course,
01271       // is only done to make the code not understandable anymore for you.
01272 #if LIBECC_SWAPCOLUMNS
01273       for (unsigned int colcnt = (m + 1) / 2; colcnt < m; ++colcnt)
01274 #else
01275       for (unsigned int wipecol = (m + 1) / 2; wipecol < m; ++wipecol)
01276 #endif
01277       {
01278 #if LIBECC_SWAPCOLUMNS
01279         // Find the next row that wasn't already wiped.
01280         unsigned int wipecol = colswaps[colcnt];
01281 #if ECC_DEBUG
01282         LibEccDout(dc::gaussj, "colcnt = " << colcnt);
01283         for (unsigned int row = 0; row < m; ++row)
01284         {
01285           LibEccDout(dc::gaussj, "colswaps[" << row << "] = " << colswaps[row] << "\t\tcolswaps_inverse[" << row << "] = " << colswaps_inverse[row]);
01286           assert(colswaps[colswaps_inverse[row]] == row);
01287           assert(colswaps_inverse[colswaps[row]] == row);
01288         }
01289         LibEccDout(dc::polynomial|noprefix_cf, "");
01290 #endif
01291 #endif
01292 
01293         // First find a suitable row to wipe with.
01294         // This searching is called 'pivotting'.
01295         LibEccDout(dc::gaussj, "Searching for suitable row to wipe with in column " << wipecol);
01296         unsigned int pivotrow;
01297         if (!matrix[wipecol].test(wipecol) || pivotted.test(wipecol))
01298         {
01299           for (pivotrow = wipecol;;)
01300           {
01301             if (++pivotrow == m)
01302             {
01303               if (matrix[0].test(wipecol) && !pivotted.template test<0>())
01304                 pivotrow = 0;
01305               else
01306               {
01307                 for (pivotrow = (m + 1) / 2; pivotrow < wipecol; ++pivotrow)
01308                   if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01309                     break;
01310               }
01311               if (pivotrow == wipecol)
01312               {
01313                 // This happens when we swapped with column 0 (which is all zeroes), for example when m == 14.
01314                 // Just ignore this column.
01315                 pivotrow = m;                   // Flag that we need to continue the main loop.
01316                 pivotted.set(wipecol);
01317                 matrix[wipecol].set(wipecol);   // Copy identity matrix over.
01318                 break;
01319               }
01320             }
01321             if (matrix[pivotrow].test(wipecol) && !pivotted.test(pivotrow))
01322               break;
01323           }
01324           if (pivotrow == m)
01325             continue;
01326         }
01327         else
01328           pivotrow = wipecol;
01329         LibEccDout(dc::gaussj, "Using row " << pivotrow << " to wipe column " << wipecol);
01330         LibEccDout(dc::gaussj, "Before:");
01331         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01332         pivotted.set(pivotrow);
01333 #if LIBECC_SWAPCOLUMNS
01334         rowswaps[colcnt] = pivotrow;
01335         LibEccDout(dc::gaussj, "Setting rowswaps[" << colcnt << "] to " << pivotrow);
01336 #else
01337         rowswaps[wipecol] = pivotrow;                   // We temporarily use row 'pivotrow' to store row 'wipecol'.
01338         LibEccDout(dc::gaussj, "Setting rowswaps[" << wipecol << "] to " << pivotrow);
01339 #endif
01340         if (pivotrow == wipecol)
01341         {
01342 #if LIBECC_INPLACE
01343           matrix[pivotrow].set(wipecol);                // Store the inverse in-place, destroying the original.
01344 #endif
01345           for (unsigned int row = 0; row < m; ++row)
01346           {
01347             if (row == pivotrow)
01348               continue;
01349             if (matrix[row].test(wipecol))
01350             {
01351 #if LIBECC_INPLACE
01352               matrix[row].clear(wipecol);               // Store the inverse in-place, destroying the original.
01353 #endif
01354               matrix[row] ^= matrix[pivotrow]; 
01355             }
01356           }
01357         }
01358         else
01359         {
01360           // This block contains the main magic.  It's hard to understand I am afraid.
01361           // Basically this does the same as the code block above, but at the same time
01362           // swaps the columns 'wipecol' and 'pivotrow'.
01363 
01364 #if LIBECC_SWAPCOLUMNS
01365           // Swap pivot row bits, and set the bit in pivotrow (thats the identity matrix bit).
01366           if (matrix[pivotrow].test(pivotrow) != matrix[pivotrow].test(wipecol))
01367           {
01368             matrix[pivotrow].flip(wipecol);
01369 #if !LIBECC_INPLACE
01370             matrix[pivotrow].flip(pivotrow);    // No need to flip the 'pivotrow' column when we set it in the next line.
01371 #endif
01372           }
01373 #endif
01374 #if LIBECC_INPLACE
01375           matrix[pivotrow].set(pivotrow);               // Store the inverse in-place, destroying the original.
01376 #endif
01377           for (unsigned int row = 0; row < m; ++row)
01378           {
01379             if (row == pivotrow)                        // Don't wipe the row that we use to wipe.
01380               continue;
01381             matrixrow_type& mrow = matrix[row];
01382             if (mrow.test(wipecol))
01383             {
01384 #if LIBECC_SWAPCOLUMNS
01385               if (!mrow.test(pivotrow))         // If the value in the two columns differ,
01386               {
01387                 mrow.clear(wipecol);            // swap the two values.
01388 #if !LIBECC_INPLACE
01389                 mrow.set(pivotrow);             // No need to set pivotrow when it is overwritten in the next line.
01390 #endif
01391               }
01392 #endif
01393 #if LIBECC_INPLACE
01394               mrow.clear(pivotrow);             // Store the inverse in-place, destroying the original.
01395                                                 // This represents a 0 from the identity matrix.
01396 #endif
01397               mrow ^= matrix[pivotrow];         // Ok, now the columns have been swapped and to-be-wiped column
01398                                                 // has been replaced with a clean identity matrix bit. Perform
01399                                                 // the actual wiping.
01400             }
01401 #if LIBECC_SWAPCOLUMNS
01402             else if (mrow.test(pivotrow))       // Are the pivotrow and wipecol different?
01403             {
01404               mrow.set(wipecol);                // Then flip both, exchanging them effectively. If they
01405               mrow.clear(pivotrow);             //   were the same, consider them exchanged anyway.
01406             }
01407 #endif
01408           }
01409 #if LIBECC_SWAPCOLUMNS
01410           LibEccDout(dc::gaussj, "Also swapped columns " << pivotrow << " and " << wipecol);
01411           // Keep colswaps up to date.  We need colswaps_inverse to do that, therefore
01412           // we need to keep colswaps_inverse up to date too.
01413           std::swap(colswaps[colswaps_inverse[wipecol]], colswaps[colswaps_inverse[pivotrow]]);
01414           std::swap(colswaps_inverse[wipecol], colswaps_inverse[pivotrow]);
01415 #endif
01416         }
01417         LibEccDout(dc::gaussj, "After:");
01418         LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01419       }
01420 
01421 #if ECC_DEBUG
01422       for (unsigned int i = 0; i < m; ++i)
01423       {
01424         if (rowswaps[i] != i)
01425           LibEccDout(dc::gaussj, i << " : " << rowswaps[i]);
01426         // Skip the first half of the matrix.
01427         if (i == 0)
01428           i = (m + 1) / 2 - 1;
01429       }
01430       LibEccDout(dc::gaussj|noprefix_cf, "");
01431 #endif
01432 
01433       if (pivotted.test(0))
01434       {
01435         int row0 = (m + 1) / 2;
01436         while (pivotted.test(row0))
01437           ++row0;
01438         rowswaps[0] = row0;
01439         pivotted.set(row0);
01440       }
01441 
01442       // Next perform some row rotations, in order to get all rows on their correct places again.
01443       for (unsigned int i = 0; i < m; ++i)
01444       {
01445         if (rowswaps[i] != i)
01446         {
01447           unsigned int j = i;
01448           bitset<2 * m> temp = matrix[j];
01449           LibEccDout(dc::gaussj|continued_cf, j);
01450           do
01451           {
01452             matrix[j] = matrix[rowswaps[j]];
01453             LibEccDout(dc::continued, " <-- " << rowswaps[j]);
01454             j = rowswaps[j];
01455           }
01456           while (rowswaps[j] != i);
01457           matrix[j] = temp;
01458           LibEccDout(dc::finish, " <-- " << i);
01459           j = i;
01460           do
01461           {
01462             int pj = j;
01463             j = rowswaps[pj];
01464             // Update the administration so that we won't try to rotate them again.
01465             rowswaps[pj] = pj;
01466           }
01467           while (j != i);
01468         }
01469         // Skip the first half of the matrix.
01470         if (i == 0)
01471           i = (m + 1) / 2 - 1;
01472       }
01473 
01474       LibEccDebug(if (dc::gaussj.is_on()) print_matrix(matrix, pivotted));
01475       matrix_initialized = true;
01476     }
01477 
01478     // Multiply the matrix with cdb2.
01479     for (unsigned int row = 0; row < m; ++row)
01480     {
01481 #if LIBECC_AUGMENTED
01482 #if LIBECC_INPLACE
01483       bitset<m> tmp = matrix[row];
01484 #else
01485       bitset<2 * m> tmp2;
01486       matrix[row].template shift_op<m, right, assign>(tmp2);
01487       bitset<m> tmp = tmp2;
01488 #endif
01489       tmp &= cdb2.get_bitset();
01490 #else
01491       bitset<m> tmp = matrix[row] & cdb2.get_bitset();
01492 #endif
01493       if (tmp.odd())
01494         M_coefficients.set(row);
01495     }
01496 
01497     // Finally, multiply with b to get x.
01498     *this *= b;
01499   }
01500 
01501 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01502   inline bool
01503   operator==(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01504   {
01505     return p1.M_coefficients == p2.M_coefficients;
01506   }
01507 
01508 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01509   inline bool
01510   operator==(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01511   {
01512     return polynomial<m, k, k1, k2>(expr) == p2;
01513   }
01514 
01515 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01516   inline bool
01517   operator==(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01518   {
01519     return p1 == polynomial<m, k, k1, k2>(expr);
01520   }
01521 
01522 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01523   inline bool
01524   operator!=(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01525   {
01526     return p1.M_coefficients != p2.M_coefficients;
01527   }
01528 
01529 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01530   inline bool
01531   operator!=(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01532   {
01533     return polynomial<m, k, k1, k2>(expr) != p2;
01534   }
01535 
01536 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01537   inline bool
01538   operator!=(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01539   {
01540     return p1 != polynomial<m, k, k1, k2>(expr);
01541   }
01542 
01543 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01544   inline typename polynomial<m, k, k1, k2>::xor_type
01545   operator+(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01546   {
01547     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01548   }
01549 
01550 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01551   inline typename polynomial<m, k, k1, k2>::xor_type
01552   operator-(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01553   {
01554     return typename polynomial<m, k, k1, k2>::xor_type(p1.M_coefficients, p2.M_coefficients);
01555   }
01556 
01557 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01558   inline polynomial<m, k, k1, k2>
01559   operator*(polynomial<m, k, k1, k2> const& p1, polynomial<m, k, k1, k2> const& p2)
01560   {
01561     polynomial<m, k, k1, k2> result;
01562     p1.multiply_with(p2, result.M_coefficients);
01563     return result;
01564   }
01565 
01566 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01567   inline polynomial<m, k, k1, k2>
01568   operator*(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01569   {
01570     return polynomial<m, k, k1, k2>(expr) * p2;
01571   }
01572 
01573 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01574   inline polynomial<m, k, k1, k2>
01575   operator*(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01576   {
01577     return p1 * polynomial<m, k, k1, k2>(expr);
01578   }
01579 
01580 
01581 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01582   inline polynomial<m, k, k1, k2>
01583   operator/(polynomial<m, k, k1, k2> const& e1, polynomial<m, k, k1, k2> const& e2)
01584   {
01585     polynomial<m, k, k1, k2> tmp(e1);
01586     tmp /= e2;
01587     return tmp;
01588   }
01589 
01590 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01591   inline polynomial<m, k, k1, k2>
01592   operator/(typename polynomial<m, k, k1, k2>::xor_type const& expr, polynomial<m, k, k1, k2> const& p2)
01593   {
01594     return polynomial<m, k, k1, k2>(expr) / p2;
01595   }
01596 
01597 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01598   inline polynomial<m, k, k1, k2>
01599   operator/(polynomial<m, k, k1, k2> const& p1, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01600   {
01601     return p1 / polynomial<m, k, k1, k2>(expr);
01602   }
01603 
01604 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01605   std::ostream& operator<<(std::ostream& os, polynomial<m, k, k1, k2> const& p)
01606   {
01607     p.M_coefficients.base2_print_on(os);
01608     return os;
01609   }
01610 
01611 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01612   std::ostream& operator<<(std::ostream& os, typename polynomial<m, k, k1, k2>::xor_type const& expr)
01613   {
01614     polynomial<m, k, k1, k2> p(expr);
01615     p.M_coefficients.base2_print_on(os);
01616     return os;
01617   }
01618 
01619 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01620   bool polynomial<m, k, k1, k2>::S_normal_initialized;
01621 
01622 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01623   bitset<m> polynomial<m, k, k1, k2>::S_normal;
01624  
01625 template<unsigned int m, unsigned int k, unsigned int k1, unsigned int k2>
01626   void polynomial<m, k, k1, k2>::calculate_normal(void)
01627   {
01628 #if 0
01629     bitset<m> single_bit(1);
01630     polynomial trace;
01631     bitset_digit_t nextfrob1_buf[square_digits];
01632     bitset_digit_t nextfrob2_buf[square_digits];
01633     polynomial* nextfrob1;
01634     polynomial* nextfrob2;
01635     for (int bit = 0; bit < m; ++bit)
01636     {
01637       trace = single_bit;
01638       nextfrob1 = &trace.square(nextfrob1_buf);
01639       for (int i = 0; i < (m - 1) / 2; ++i)
01640       {
01641         nextfrob2 = &nextfrob1->square(nextfrob2_buf);
01642         trace += *nextfrob1 + *nextfrob2;
01643         if ((m & 1) && i == (m - 3) / 2)
01644           break;
01645         nextfrob1 = &nextfrob2->square(nextfrob1_buf);
01646       }
01647       if (!(m & 1))
01648         trace += *nextfrob1;
01649       if (trace.get_bitset().template test<0>())
01650         S_normal.set(bit);
01651       single_bit.template shift_op<1, libecc::left, libecc::assign>(single_bit);
01652     }
01653 #else
01654     // We can do that faster... I didn't prove this yet, but it works.
01655     if ((m & 1))
01656       S_normal.template set<0>();
01657     if (((m - k) & 1))
01658       S_normal.template set<m - k>();
01659     if (k1)
01660     {
01661       if (((m - k1) & 1))
01662         S_normal.template set<m - k1>();
01663       if (((m - k2) & 1))
01664         S_normal.template set<m - k2>();
01665     }
01666 #endif
01667     S_normal_initialized = true;
01668   }
01669 
01670 } // namespace libecc
01671 
01672 #include <libecc/square.hcc>    // File with different copyright.
01673 
01674 #endif // LIBECC_POLYNOMIAL_H