// Copyright (C) 2003, International Business Machines // Corporation and others. All Rights Reserved. #include "CoinPragma.hpp" #include "CoinHelperFunctions.hpp" #include "ClpInterior.hpp" #include "ClpCholeskyDense.hpp" #include "ClpMessage.hpp" #include "ClpQuadraticObjective.hpp" //############################################################################# // Constructors / Destructor / Assignment //############################################################################# //------------------------------------------------------------------- // Default Constructor //------------------------------------------------------------------- ClpCholeskyDense::ClpCholeskyDense () : ClpCholeskyBase(), borrowSpace_(false) { type_=11;; } //------------------------------------------------------------------- // Copy constructor //------------------------------------------------------------------- ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs) : ClpCholeskyBase(rhs), borrowSpace_(rhs.borrowSpace_) { assert(!rhs.borrowSpace_||!rhs.sizeFactor_); // can't do if borrowing space } //------------------------------------------------------------------- // Destructor //------------------------------------------------------------------- ClpCholeskyDense::~ClpCholeskyDense () { if (borrowSpace_) { // set NULL sparseFactor_=NULL; workDouble_=NULL; diagonal_=NULL; } } //---------------------------------------------------------------- // Assignment operator //------------------------------------------------------------------- ClpCholeskyDense & ClpCholeskyDense::operator=(const ClpCholeskyDense& rhs) { if (this != &rhs) { assert(!rhs.borrowSpace_||!rhs.sizeFactor_); // can't do if borrowing space ClpCholeskyBase::operator=(rhs); borrowSpace_=rhs.borrowSpace_; } return *this; } //------------------------------------------------------------------- // Clone //------------------------------------------------------------------- ClpCholeskyBase * ClpCholeskyDense::clone() const { return new ClpCholeskyDense(*this); } // If not power of 2 then need to redo a bit #define BLOCK 16 #define BLOCKSHIFT 4 #define BLOCKSQ ( BLOCK*BLOCK ) #define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT ) #define number_blocks(x) (((x)+BLOCK-1)>>BLOCKSHIFT) #define number_rows(x) ((x)<>BLOCKSHIFT; // allow one stripe extra numberBlocks = numberBlocks + ((numberBlocks*(numberBlocks+1))/2); sizeFactor_=numberBlocks*BLOCKSQ; //#define CHOL_COMPARE #ifdef CHOL_COMPARE sizeFactor_ += 95000; #endif if (!factor) { sparseFactor_ = new longDouble [sizeFactor_]; rowsDropped_ = new char [numberRows_]; memset(rowsDropped_,0,numberRows_); workDouble_ = new longDouble[numberRows_]; diagonal_ = new longDouble[numberRows_]; } else { borrowSpace_=true; int numberFull = factor->numberRows(); sparseFactor_ = factor->sparseFactor()+(factor->size()-sizeFactor_); workDouble_ = factor->workDouble() + (numberFull-numberRows_); diagonal_ = factor->diagonal() + (numberFull-numberRows_); } numberRowsDropped_=0; return 0; } /* Returns space needed */ CoinBigIndex ClpCholeskyDense::space( int numberRows) const { int numberBlocks = (numberRows+BLOCK-1)>>BLOCKSHIFT; // allow one stripe extra numberBlocks = numberBlocks + ((numberBlocks*(numberBlocks+1))/2); CoinBigIndex sizeFactor=numberBlocks*BLOCKSQ; #ifdef CHOL_COMPARE sizeFactor += 95000; #endif return sizeFactor; } /* Orders rows and saves pointer to matrix.and model */ int ClpCholeskyDense::order(ClpInterior * model) { model_=model; int numberRows; int numberRowsModel = model_->numberRows(); int numberColumns = model_->numberColumns(); if (!doKKT_) { numberRows = numberRowsModel; } else { numberRows = 2*numberRowsModel+numberColumns; } reserveSpace(NULL,numberRows); rowCopy_ = model->clpMatrix()->reverseOrderedCopy(); return 0; } /* Does Symbolic factorization given permutation. This is called immediately after order. If user provides this then user must provide factorize and solve. Otherwise the default factorization is used returns non-zero if not enough memory */ int ClpCholeskyDense::symbolic() { return 0; } /* Factorize - filling in rowsDropped and returning number dropped */ int ClpCholeskyDense::factorize(const double * diagonal, int * rowsDropped) { assert (!borrowSpace_); const CoinBigIndex * columnStart = model_->clpMatrix()->getVectorStarts(); const int * columnLength = model_->clpMatrix()->getVectorLengths(); const int * row = model_->clpMatrix()->getIndices(); const double * element = model_->clpMatrix()->getElements(); const CoinBigIndex * rowStart = rowCopy_->getVectorStarts(); const int * rowLength = rowCopy_->getVectorLengths(); const int * column = rowCopy_->getIndices(); const double * elementByRow = rowCopy_->getElements(); int numberColumns=model_->clpMatrix()->getNumCols(); CoinZeroN(sparseFactor_,sizeFactor_); //perturbation longDouble perturbation=model_->diagonalPerturbation()*model_->diagonalNorm(); perturbation=perturbation*perturbation; if (perturbation>1.0) { #ifdef COIN_DEVELOP //if (model_->model()->logLevel()&4) std::cout <<"large perturbation "<delta(); // add delta*delta to diagonal delta2 *= delta2; if (!doKKT_) { longDouble * work = sparseFactor_; work--; // skip diagonal int addOffset=numberRows_-1; const double * diagonalSlack = diagonal + numberColumns; // largest in initial matrix double largest2=1.0e-20; for (iRow=0;iRowiRow) { longDouble value=element[j]*multiplier; work[jRow] += value; } else if (jRow==iRow) { longDouble value=element[j]*multiplier; diagonalValue += value; } } } } for (int j=iRow+1;jlargest2) { diagonal_[iRow]=diagonal+perturbation; } else { diagonal_[iRow]=diagonal+perturbation; rowsDropped[iRow]=2; numberDroppedBefore++; } } } doubleParameters_[10]=CoinMax(1.0e-20,largest); integerParameters_[20]=0; doubleParameters_[3]=0.0; doubleParameters_[4]=COIN_DBL_MAX; integerParameters_[34]=0; // say all must be positive #ifdef CHOL_COMPARE if (numberRows_<200) factorizePart3(rowsDropped); #endif factorizePart2(rowsDropped); newDropped=integerParameters_[20]+numberDroppedBefore; largest=doubleParameters_[3]; smallest=doubleParameters_[4]; if (model_->messageHandler()->logLevel()>1) std::cout<<"Cholesky - largest "<(model_->objectiveAsObject())); if (quadraticObj) quadratic = quadraticObj->quadraticObjective(); // matrix int numberRowsModel = model_->numberRows(); int numberColumns = model_->numberColumns(); int numberTotal = numberColumns + numberRowsModel; longDouble * work = sparseFactor_; work--; // skip diagonal int addOffset=numberRows_-1; int iColumn; if (!quadratic) { for (iColumn=0;iColumn1.0e-100) { value = 1.0/value; largest = CoinMax(largest,fabs(value)); diagonal_[iColumn] = -value; CoinBigIndex start=columnStart[iColumn]; CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn]; for (CoinBigIndex j=start;jgetIndices(); const CoinBigIndex * columnQuadraticStart = quadratic->getVectorStarts(); const int * columnQuadraticLength = quadratic->getVectorLengths(); const double * quadraticElement = quadratic->getElements(); for (iColumn=0;iColumn1.0e-100) { value = 1.0/value; for (j=columnQuadraticStart[iColumn]; jiColumn) { work[jColumn]=-quadraticElement[j]; } else if (iColumn==jColumn) { value += quadraticElement[j]; } } largest = CoinMax(largest,fabs(value)); diagonal_[iColumn] = -value; CoinBigIndex start=columnStart[iColumn]; CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn]; for (j=start;j1.0e-100) { value = 1.0/value; largest = CoinMax(largest,fabs(value)); } else { value = 1.0e100; } diagonal_[iColumn] = -value; work[iColumn-numberColumns+numberTotal]=-1.0; addOffset--; work += addOffset; } // Finish diagonal for (iRow=0;iRowmessageHandler()->logLevel()>1) std::cout<<"Cholesky - largest "<primalR(); double * dualR = model_->dualR(); for (iRow=0;iRow=dropValue) { smallest = CoinMin(smallest,diagonalValue); largest = CoinMax(largest,diagonalValue); workDouble_[iColumn]=diagonalValue; diagonalValue = 1.0/diagonalValue; } else { dropColumn=true; workDouble_[iColumn]=1.0e100; diagonalValue=0.0; integerParameters_[20]++; } } if (!dropColumn) { diagonal_[iColumn]=diagonalValue; for (iRow=iColumn+1;iRow>BLOCKSHIFT; longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks; int k = array-a; assert ((k%BLOCKSQ)==0); iCol=0; int kk=k>>BLOCKSQSHIFT; //printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT); k=kk; while (k>=numberBlocks) { iCol++; k -= numberBlocks; numberBlocks--; } iRow = iCol+k; counter++; if(counter>100000) exit(77); return kk; } #endif /* Factorize - filling in rowsDropped and returning number dropped */ void ClpCholeskyDense::factorizePart2( int * rowsDropped) { int iColumn; //longDouble * xx = sparseFactor_; //longDouble * yy = diagonal_; //diagonal_ = sparseFactor_ + 40000; //sparseFactor_=diagonal_ + numberRows_; //memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double)); //memcpy(sparseFactor_,xx,40000*sizeof(double)); //memcpy(diagonal_,yy,numberRows_*sizeof(double)); int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT; // later align on boundary longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks; int n=numberRows_; int nRound = numberRows_&(~(BLOCK-1)); // adjust if exact if (nRound==n) nRound -= BLOCK; int sizeLastBlock=n-nRound; int get = n*(n-1)/2; // ? as no diagonal int block = numberBlocks*(numberBlocks+1)/2; int ifOdd; int rowLast; if (sizeLastBlock!=BLOCK) { longDouble * aa = &a[(block-1)*BLOCKSQ]; rowLast=nRound-1; ifOdd=1; int put = BLOCKSQ; // do last separately put -= (BLOCK-sizeLastBlock)*(BLOCK+1); for (iColumn=numberRows_-1;iColumn>=nRound;iColumn--) { int put2 = put; put -= BLOCK; for (int iRow=numberRows_-1;iRow>iColumn;iRow--) { aa[--put2] = sparseFactor_[--get]; assert (aa+put2>=sparseFactor_+get); } // save diagonal as well aa[--put2]=diagonal_[iColumn]; } n=nRound; block--; } else { // exact fit rowLast=numberRows_-1; ifOdd=0; } // Now main loop int nBlock=0; for (;n>0;n-=BLOCK) { longDouble * aa = &a[(block-1)*BLOCKSQ]; longDouble * aaLast=NULL; int put = BLOCKSQ; int putLast=0; // see if we have small block if (ifOdd) { aaLast = &a[(block-1)*BLOCKSQ]; aa = aaLast-BLOCKSQ; putLast = BLOCKSQ-BLOCK+sizeLastBlock; } for (iColumn=n-1;iColumn>=n-BLOCK;iColumn--) { if (aaLast) { // last bit for (int iRow=numberRows_-1;iRow>rowLast;iRow--) { aaLast[--putLast] = sparseFactor_[--get]; assert (aaLast+putLast>=sparseFactor_+get); } putLast -= BLOCK-sizeLastBlock; } longDouble * aPut = aa; int j=rowLast; for (int jBlock=0;jBlock<=nBlock;jBlock++) { int put2 = put; int last = CoinMax(j-BLOCK,iColumn); for (int iRow=j;iRow>last;iRow--) { aPut[--put2] = sparseFactor_[--get]; assert (aPut+put2>=sparseFactor_+get); } if (j-BLOCK>1); int nThis=number_rows(nb); longDouble * aother; int nLeft=n-nThis; int nintri=(nb*(nb+1))>>1; int nbelow=(numberBlocks-nb)*nb; factor(a,nThis,numberBlocks,diagonal,work,rowsDropped); triRec(a,nThis,a+number_entries(nb),diagonal,work,nLeft,nb,0,numberBlocks); aother=a+number_entries(nintri+nbelow); recTri(a+number_entries(nb),nLeft,nThis,nb,0,aother,diagonal,work,numberBlocks); factor(aother,nLeft, numberBlocks-nb,diagonal+nThis,work+nThis,rowsDropped); } } // Non leaf recursive triangle rectangle update void ClpCholeskyDense::triRec(longDouble * aTri, int nThis, longDouble * aUnder, longDouble * diagonal, longDouble * work, int nLeft, int iBlock, int jBlock, int numberBlocks) { if (nThis<=BLOCK&&nLeft<=BLOCK) { triRecLeaf(aTri,aUnder,diagonal,work,nLeft); } else if (nThis>1); int nLeft2=number_rows(nb); triRec(aTri,nThis,aUnder,diagonal,work,nLeft2,iBlock,jBlock,numberBlocks); triRec(aTri,nThis,aUnder+number_entries(nb),diagonal,work,nLeft-nLeft2, iBlock+nb,jBlock,numberBlocks); } else { int nb=number_blocks((nThis+1)>>1); int nThis2=number_rows(nb); longDouble * aother; int kBlock=jBlock+nb; int i; int nintri=(nb*(nb+1))>>1; int nbelow=(numberBlocks-nb)*nb; triRec(aTri,nThis2,aUnder,diagonal,work,nLeft,iBlock,jBlock,numberBlocks); /* and rectangular update */ i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)- (numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1; aother=aUnder+number_entries(i); recRec(aTri+number_entries(nb),nThis-nThis2,nLeft,nThis2,aUnder,aother, diagonal,work,iBlock,kBlock,jBlock,numberBlocks); triRec(aTri+number_entries(nintri+nbelow),nThis-nThis2,aother,diagonal+nThis2, work+nThis2,nLeft, iBlock-nb,kBlock-nb,numberBlocks-nb); } } // Non leaf recursive rectangle triangle update void ClpCholeskyDense::recTri(longDouble * aUnder, int nTri, int nDo, int iBlock, int jBlock,longDouble * aTri, longDouble * diagonal, longDouble * work, int numberBlocks) { if (nTri<=BLOCK&&nDo<=BLOCK) { recTriLeaf(aUnder,aTri,diagonal,work,nTri); } else if (nTri>1); int nDo2=number_rows(nb); longDouble * aother; int i; recTri(aUnder,nTri,nDo2,iBlock,jBlock,aTri,diagonal,work,numberBlocks); i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)- (numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1; aother=aUnder+number_entries(i); recTri(aother,nTri,nDo-nDo2,iBlock-nb,jBlock,aTri,diagonal+nDo2, work+nDo2,numberBlocks-nb); } else { int nb=number_blocks((nTri+1)>>1); int nTri2=number_rows(nb); longDouble * aother; int i; recTri(aUnder,nTri2,nDo,iBlock,jBlock,aTri,diagonal,work,numberBlocks); /* and rectangular update */ i=((numberBlocks-iBlock)*(numberBlocks-iBlock+1)- (numberBlocks-iBlock-nb)*(numberBlocks-iBlock-nb+1))>>1; aother=aTri+number_entries(nb); recRec(aUnder,nTri2,nTri-nTri2,nDo,aUnder+number_entries(nb),aother, diagonal,work,iBlock+nb,iBlock,jBlock,numberBlocks); recTri(aUnder+number_entries(nb),nTri-nTri2,nDo,iBlock+nb,jBlock, aTri+number_entries(i),diagonal,work,numberBlocks); } } #ifdef CHOL_USING_BLAS // using simple blas interface extern "C" { /** BLAS Fortran subroutine DGEMM. */ void F77_FUNC(dgemm,DGEMM)(char* transa, char* transb, ipfint *m, ipfint *n, ipfint *k, const double *alpha, const double *a, ipfint *lda, const double *b, ipfint *ldb, const double *beta, double *c, ipfint *ldc, int transa_len, int transb_len); } #endif /* Non leaf recursive rectangle rectangle update, nUnder is number of rows in iBlock, nUnderK is number of rows in kBlock */ void ClpCholeskyDense::recRec(longDouble * above, int nUnder, int nUnderK, int nDo, longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work, int kBlock,int iBlock, int jBlock, int numberBlocks) { #ifdef CHOL_USING_BLAS recRecLeaf(above , aUnder , aOther, diagonal,work, nUnderK); void ClpCholeskyDense::recRecLeaf(longDouble * above, longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work, int nUnder) aa = aOther-BLOCK; for (j = 0; j < BLOCK; j ++) { aa+=BLOCK; for (i=0;i< nUnderK;i++) { longDouble t00=aa[i+0*BLOCK]; for (k=0;k>1); int nUnder2=number_rows(nb); recRec(above,nUnder,nUnder2,nDo,aUnder,aOther,diagonal,work, kBlock,iBlock,jBlock,numberBlocks); recRec(above,nUnder,nUnderK-nUnder2,nDo,aUnder+number_entries(nb), aOther+number_entries(nb),diagonal,work,kBlock+nb,iBlock,jBlock,numberBlocks); } else if (nUnderK<=nDo&&nUnder<=nDo) { int nb=number_blocks((nDo+1)>>1); int nDo2=number_rows(nb); int i; recRec(above,nUnder,nUnderK,nDo2,aUnder,aOther,diagonal,work, kBlock,iBlock,jBlock,numberBlocks); i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)- (numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1; recRec(above+number_entries(i),nUnder,nUnderK,nDo-nDo2, aUnder+number_entries(i), aOther,diagonal+nDo2,work+nDo2, kBlock-nb,iBlock-nb,jBlock,numberBlocks-nb); } else { int nb=number_blocks((nUnder+1)>>1); int nUnder2=number_rows(nb); int i; recRec(above,nUnder2,nUnderK,nDo,aUnder,aOther,diagonal,work, kBlock,iBlock,jBlock,numberBlocks); i=((numberBlocks-iBlock)*(numberBlocks-iBlock-1)- (numberBlocks-iBlock-nb)*(numberBlocks-iBlock-nb-1))>>1; recRec(above+number_entries(nb),nUnder-nUnder2,nUnderK,nDo,aUnder, aOther+number_entries(i),diagonal,work,kBlock,iBlock+nb,jBlock,numberBlocks); } } // Leaf recursive factor void ClpCholeskyDense::factorLeaf(longDouble * a, int n, longDouble * diagonal, longDouble * work, int * rowsDropped) { longDouble largest=doubleParameters_[3]; longDouble smallest=doubleParameters_[4]; double dropValue = doubleParameters_[10]; int firstPositive=integerParameters_[34]; int rowOffset=diagonal-diagonal_; int numberDropped=0; int i, j, k; longDouble t00,temp1; longDouble * aa; aa = a-BLOCK; for (j = 0; j < n; j ++) { aa+=BLOCK; t00 = aa[j]; for (k = 0; k < j; ++k) { longDouble multiplier = work[k]; t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier; } bool dropColumn=false; longDouble useT00=t00; if (j+rowOffset=dropValue) { smallest = CoinMin(smallest,t00); largest = CoinMax(largest,t00); //aa[j]=t00; t00 = 1.0/t00; } else { dropColumn=true; //aa[j]=1.0e100; useT00=1.0e-100; t00=0.0; integerParameters_[20]++; } } if (!dropColumn) { diagonal[j]=t00; work[j]=useT00; temp1 = t00; for (i=j+1;i=0;iColumn--) { double value = region2[iColumn]*diagonal_[iColumn]; work -= addOffset; addOffset++; for (iRow=iColumn+1;iRow>BLOCKSHIFT; // later align on boundary longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks; int iBlock; longDouble * aa = a; int iColumn; for (iBlock=0;iBlocknumberRows_) { nChunk=numberRows_-iDo; } else { nChunk=BLOCK; } solveF1(aa,nChunk,region+iDo); for (jBlock=iBlock+1;jBlocknumberRows_) { nChunk=numberRows_-base; } else { nChunk=BLOCK; } solveF2(aa,nChunk,region+iDo,region+base); } aa+=BLOCKSQ; } // do diagonal outside for (iColumn=0;iColumn>1); aa = a+number_entries(offset-1); int lBase=(numberBlocks-1)*BLOCK; for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) { int nChunk; int jBlock; int triBase=iBlock*BLOCK; int iBase=lBase; for (jBlock=iBlock+1;jBlocknumberRows_) { nChunk=numberRows_-iBase; } else { nChunk=BLOCK; } solveB2(aa,nChunk,region+triBase,region+iBase); iBase-=BLOCK; aa-=BLOCKSQ; } if (triBase+BLOCK>numberRows_) { nChunk=numberRows_-triBase; } else { nChunk=BLOCK; } solveB1(aa,nChunk,region+triBase); aa-=BLOCKSQ; } #ifdef CHOL_COMPARE if (numberRows_<200) { for (int i=0;i8 t0 -= region[8] * a[0 + 8 * BLOCK]; t1 -= region[8] * a[1 + 8 * BLOCK]; t2 -= region[8] * a[2 + 8 * BLOCK]; t3 -= region[8] * a[3 + 8 * BLOCK]; t0 -= region[9] * a[0 + 9 * BLOCK]; t1 -= region[9] * a[1 + 9 * BLOCK]; t2 -= region[9] * a[2 + 9 * BLOCK]; t3 -= region[9] * a[3 + 9 * BLOCK]; t0 -= region[10] * a[0 + 10 * BLOCK]; t1 -= region[10] * a[1 + 10 * BLOCK]; t2 -= region[10] * a[2 + 10 * BLOCK]; t3 -= region[10] * a[3 + 10 * BLOCK]; t0 -= region[11] * a[0 + 11 * BLOCK]; t1 -= region[11] * a[1 + 11 * BLOCK]; t2 -= region[11] * a[2 + 11 * BLOCK]; t3 -= region[11] * a[3 + 11 * BLOCK]; t0 -= region[12] * a[0 + 12 * BLOCK]; t1 -= region[12] * a[1 + 12 * BLOCK]; t2 -= region[12] * a[2 + 12 * BLOCK]; t3 -= region[12] * a[3 + 12 * BLOCK]; t0 -= region[13] * a[0 + 13 * BLOCK]; t1 -= region[13] * a[1 + 13 * BLOCK]; t2 -= region[13] * a[2 + 13 * BLOCK]; t3 -= region[13] * a[3 + 13 * BLOCK]; t0 -= region[14] * a[0 + 14 * BLOCK]; t1 -= region[14] * a[1 + 14 * BLOCK]; t2 -= region[14] * a[2 + 14 * BLOCK]; t3 -= region[14] * a[3 + 14 * BLOCK]; t0 -= region[15] * a[0 + 15 * BLOCK]; t1 -= region[15] * a[1 + 15 * BLOCK]; t2 -= region[15] * a[2 + 15 * BLOCK]; t3 -= region[15] * a[3 + 15 * BLOCK]; #endif region2[0] = t0; region2[1] = t1; region2[2] = t2; region2[3] = t3; region2+=4; a+=4; } } else { #endif for (k = 0; k < n; ++k) { longDouble t00 = region2[k]; for (j = 0; j < BLOCK; j ++) { t00 -= region[j] * a[k + j * BLOCK]; } region2[k] = t00; } #if BLOCK==16 } #endif } // Backward part of solve 1 void ClpCholeskyDense::solveB1(longDouble * a,int n,double * region) { int j, k; longDouble t00; for (j = n-1; j >=0; j --) { t00 = region[j]; for (k = j+1; k < n; ++k) { t00 -= region[k] * a[k + j * BLOCK]; } //t00*=a[j + j * BLOCK]; region[j] = t00; } } // Backward part of solve 2 void ClpCholeskyDense::solveB2(longDouble * a,int n,double * region, double * region2) { int j, k; #if BLOCK==16 if (n==BLOCK) { for (j = 0; j < BLOCK; j +=4) { longDouble t0 = region[0]; longDouble t1 = region[1]; longDouble t2 = region[2]; longDouble t3 = region[3]; t0 -= region2[0] * a[0 + 0*BLOCK]; t1 -= region2[0] * a[0 + 1*BLOCK]; t2 -= region2[0] * a[0 + 2*BLOCK]; t3 -= region2[0] * a[0 + 3*BLOCK]; t0 -= region2[1] * a[1 + 0*BLOCK]; t1 -= region2[1] * a[1 + 1*BLOCK]; t2 -= region2[1] * a[1 + 2*BLOCK]; t3 -= region2[1] * a[1 + 3*BLOCK]; t0 -= region2[2] * a[2 + 0*BLOCK]; t1 -= region2[2] * a[2 + 1*BLOCK]; t2 -= region2[2] * a[2 + 2*BLOCK]; t3 -= region2[2] * a[2 + 3*BLOCK]; t0 -= region2[3] * a[3 + 0*BLOCK]; t1 -= region2[3] * a[3 + 1*BLOCK]; t2 -= region2[3] * a[3 + 2*BLOCK]; t3 -= region2[3] * a[3 + 3*BLOCK]; t0 -= region2[4] * a[4 + 0*BLOCK]; t1 -= region2[4] * a[4 + 1*BLOCK]; t2 -= region2[4] * a[4 + 2*BLOCK]; t3 -= region2[4] * a[4 + 3*BLOCK]; t0 -= region2[5] * a[5 + 0*BLOCK]; t1 -= region2[5] * a[5 + 1*BLOCK]; t2 -= region2[5] * a[5 + 2*BLOCK]; t3 -= region2[5] * a[5 + 3*BLOCK]; t0 -= region2[6] * a[6 + 0*BLOCK]; t1 -= region2[6] * a[6 + 1*BLOCK]; t2 -= region2[6] * a[6 + 2*BLOCK]; t3 -= region2[6] * a[6 + 3*BLOCK]; t0 -= region2[7] * a[7 + 0*BLOCK]; t1 -= region2[7] * a[7 + 1*BLOCK]; t2 -= region2[7] * a[7 + 2*BLOCK]; t3 -= region2[7] * a[7 + 3*BLOCK]; #if BLOCK>8 t0 -= region2[8] * a[8 + 0*BLOCK]; t1 -= region2[8] * a[8 + 1*BLOCK]; t2 -= region2[8] * a[8 + 2*BLOCK]; t3 -= region2[8] * a[8 + 3*BLOCK]; t0 -= region2[9] * a[9 + 0*BLOCK]; t1 -= region2[9] * a[9 + 1*BLOCK]; t2 -= region2[9] * a[9 + 2*BLOCK]; t3 -= region2[9] * a[9 + 3*BLOCK]; t0 -= region2[10] * a[10 + 0*BLOCK]; t1 -= region2[10] * a[10 + 1*BLOCK]; t2 -= region2[10] * a[10 + 2*BLOCK]; t3 -= region2[10] * a[10 + 3*BLOCK]; t0 -= region2[11] * a[11 + 0*BLOCK]; t1 -= region2[11] * a[11 + 1*BLOCK]; t2 -= region2[11] * a[11 + 2*BLOCK]; t3 -= region2[11] * a[11 + 3*BLOCK]; t0 -= region2[12] * a[12 + 0*BLOCK]; t1 -= region2[12] * a[12 + 1*BLOCK]; t2 -= region2[12] * a[12 + 2*BLOCK]; t3 -= region2[12] * a[12 + 3*BLOCK]; t0 -= region2[13] * a[13 + 0*BLOCK]; t1 -= region2[13] * a[13 + 1*BLOCK]; t2 -= region2[13] * a[13 + 2*BLOCK]; t3 -= region2[13] * a[13 + 3*BLOCK]; t0 -= region2[14] * a[14 + 0*BLOCK]; t1 -= region2[14] * a[14 + 1*BLOCK]; t2 -= region2[14] * a[14 + 2*BLOCK]; t3 -= region2[14] * a[14 + 3*BLOCK]; t0 -= region2[15] * a[15 + 0*BLOCK]; t1 -= region2[15] * a[15 + 1*BLOCK]; t2 -= region2[15] * a[15 + 2*BLOCK]; t3 -= region2[15] * a[15 + 3*BLOCK]; #endif region[0] = t0; region[1] = t1; region[2] = t2; region[3] = t3; a+=4*BLOCK; region+=4; } } else { #endif for (j = 0; j < BLOCK; j ++) { longDouble t00 = region[j]; for (k = 0; k < n; ++k) { t00 -= region2[k] * a[k + j * BLOCK]; } region[j] = t00; } #if BLOCK==16 } #endif } /* Uses factorization to solve. */ void ClpCholeskyDense::solveLong (longDouble * region) { int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT; // later align on boundary longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks; int iBlock; longDouble * aa = a; int iColumn; for (iBlock=0;iBlocknumberRows_) { nChunk=numberRows_-iDo; } else { nChunk=BLOCK; } solveF1Long(aa,nChunk,region+iDo); for (jBlock=iBlock+1;jBlocknumberRows_) { nChunk=numberRows_-base; } else { nChunk=BLOCK; } solveF2Long(aa,nChunk,region+iDo,region+base); } aa+=BLOCKSQ; } // do diagonal outside for (iColumn=0;iColumn>1); aa = a+number_entries(offset-1); int lBase=(numberBlocks-1)*BLOCK; for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) { int nChunk; int jBlock; int triBase=iBlock*BLOCK; int iBase=lBase; for (jBlock=iBlock+1;jBlocknumberRows_) { nChunk=numberRows_-iBase; } else { nChunk=BLOCK; } solveB2Long(aa,nChunk,region+triBase,region+iBase); iBase-=BLOCK; aa-=BLOCKSQ; } if (triBase+BLOCK>numberRows_) { nChunk=numberRows_-triBase; } else { nChunk=BLOCK; } solveB1Long(aa,nChunk,region+triBase); aa-=BLOCKSQ; } } // Forward part of solve 1 void ClpCholeskyDense::solveF1Long(longDouble * a,int n,longDouble * region) { int j, k; longDouble t00; for (j = 0; j < n; j ++) { t00 = region[j]; for (k = 0; k < j; ++k) { t00 -= region[k] * a[j + k * BLOCK]; } //t00*=a[j + j * BLOCK]; region[j] = t00; } } // Forward part of solve 2 void ClpCholeskyDense::solveF2Long(longDouble * a,int n,longDouble * region, longDouble * region2) { int j, k; #if BLOCK==16 if (n==BLOCK) { for (k = 0; k < BLOCK; k+=4) { longDouble t0 = region2[0]; longDouble t1 = region2[1]; longDouble t2 = region2[2]; longDouble t3 = region2[3]; t0 -= region[0] * a[0 + 0 * BLOCK]; t1 -= region[0] * a[1 + 0 * BLOCK]; t2 -= region[0] * a[2 + 0 * BLOCK]; t3 -= region[0] * a[3 + 0 * BLOCK]; t0 -= region[1] * a[0 + 1 * BLOCK]; t1 -= region[1] * a[1 + 1 * BLOCK]; t2 -= region[1] * a[2 + 1 * BLOCK]; t3 -= region[1] * a[3 + 1 * BLOCK]; t0 -= region[2] * a[0 + 2 * BLOCK]; t1 -= region[2] * a[1 + 2 * BLOCK]; t2 -= region[2] * a[2 + 2 * BLOCK]; t3 -= region[2] * a[3 + 2 * BLOCK]; t0 -= region[3] * a[0 + 3 * BLOCK]; t1 -= region[3] * a[1 + 3 * BLOCK]; t2 -= region[3] * a[2 + 3 * BLOCK]; t3 -= region[3] * a[3 + 3 * BLOCK]; t0 -= region[4] * a[0 + 4 * BLOCK]; t1 -= region[4] * a[1 + 4 * BLOCK]; t2 -= region[4] * a[2 + 4 * BLOCK]; t3 -= region[4] * a[3 + 4 * BLOCK]; t0 -= region[5] * a[0 + 5 * BLOCK]; t1 -= region[5] * a[1 + 5 * BLOCK]; t2 -= region[5] * a[2 + 5 * BLOCK]; t3 -= region[5] * a[3 + 5 * BLOCK]; t0 -= region[6] * a[0 + 6 * BLOCK]; t1 -= region[6] * a[1 + 6 * BLOCK]; t2 -= region[6] * a[2 + 6 * BLOCK]; t3 -= region[6] * a[3 + 6 * BLOCK]; t0 -= region[7] * a[0 + 7 * BLOCK]; t1 -= region[7] * a[1 + 7 * BLOCK]; t2 -= region[7] * a[2 + 7 * BLOCK]; t3 -= region[7] * a[3 + 7 * BLOCK]; #if BLOCK>8 t0 -= region[8] * a[0 + 8 * BLOCK]; t1 -= region[8] * a[1 + 8 * BLOCK]; t2 -= region[8] * a[2 + 8 * BLOCK]; t3 -= region[8] * a[3 + 8 * BLOCK]; t0 -= region[9] * a[0 + 9 * BLOCK]; t1 -= region[9] * a[1 + 9 * BLOCK]; t2 -= region[9] * a[2 + 9 * BLOCK]; t3 -= region[9] * a[3 + 9 * BLOCK]; t0 -= region[10] * a[0 + 10 * BLOCK]; t1 -= region[10] * a[1 + 10 * BLOCK]; t2 -= region[10] * a[2 + 10 * BLOCK]; t3 -= region[10] * a[3 + 10 * BLOCK]; t0 -= region[11] * a[0 + 11 * BLOCK]; t1 -= region[11] * a[1 + 11 * BLOCK]; t2 -= region[11] * a[2 + 11 * BLOCK]; t3 -= region[11] * a[3 + 11 * BLOCK]; t0 -= region[12] * a[0 + 12 * BLOCK]; t1 -= region[12] * a[1 + 12 * BLOCK]; t2 -= region[12] * a[2 + 12 * BLOCK]; t3 -= region[12] * a[3 + 12 * BLOCK]; t0 -= region[13] * a[0 + 13 * BLOCK]; t1 -= region[13] * a[1 + 13 * BLOCK]; t2 -= region[13] * a[2 + 13 * BLOCK]; t3 -= region[13] * a[3 + 13 * BLOCK]; t0 -= region[14] * a[0 + 14 * BLOCK]; t1 -= region[14] * a[1 + 14 * BLOCK]; t2 -= region[14] * a[2 + 14 * BLOCK]; t3 -= region[14] * a[3 + 14 * BLOCK]; t0 -= region[15] * a[0 + 15 * BLOCK]; t1 -= region[15] * a[1 + 15 * BLOCK]; t2 -= region[15] * a[2 + 15 * BLOCK]; t3 -= region[15] * a[3 + 15 * BLOCK]; #endif region2[0] = t0; region2[1] = t1; region2[2] = t2; region2[3] = t3; region2+=4; a+=4; } } else { #endif for (k = 0; k < n; ++k) { longDouble t00 = region2[k]; for (j = 0; j < BLOCK; j ++) { t00 -= region[j] * a[k + j * BLOCK]; } region2[k] = t00; } #if BLOCK==16 } #endif } // Backward part of solve 1 void ClpCholeskyDense::solveB1Long(longDouble * a,int n,longDouble * region) { int j, k; longDouble t00; for (j = n-1; j >=0; j --) { t00 = region[j]; for (k = j+1; k < n; ++k) { t00 -= region[k] * a[k + j * BLOCK]; } //t00*=a[j + j * BLOCK]; region[j] = t00; } } // Backward part of solve 2 void ClpCholeskyDense::solveB2Long(longDouble * a,int n,longDouble * region, longDouble * region2) { int j, k; #if BLOCK==16 if (n==BLOCK) { for (j = 0; j < BLOCK; j +=4) { longDouble t0 = region[0]; longDouble t1 = region[1]; longDouble t2 = region[2]; longDouble t3 = region[3]; t0 -= region2[0] * a[0 + 0*BLOCK]; t1 -= region2[0] * a[0 + 1*BLOCK]; t2 -= region2[0] * a[0 + 2*BLOCK]; t3 -= region2[0] * a[0 + 3*BLOCK]; t0 -= region2[1] * a[1 + 0*BLOCK]; t1 -= region2[1] * a[1 + 1*BLOCK]; t2 -= region2[1] * a[1 + 2*BLOCK]; t3 -= region2[1] * a[1 + 3*BLOCK]; t0 -= region2[2] * a[2 + 0*BLOCK]; t1 -= region2[2] * a[2 + 1*BLOCK]; t2 -= region2[2] * a[2 + 2*BLOCK]; t3 -= region2[2] * a[2 + 3*BLOCK]; t0 -= region2[3] * a[3 + 0*BLOCK]; t1 -= region2[3] * a[3 + 1*BLOCK]; t2 -= region2[3] * a[3 + 2*BLOCK]; t3 -= region2[3] * a[3 + 3*BLOCK]; t0 -= region2[4] * a[4 + 0*BLOCK]; t1 -= region2[4] * a[4 + 1*BLOCK]; t2 -= region2[4] * a[4 + 2*BLOCK]; t3 -= region2[4] * a[4 + 3*BLOCK]; t0 -= region2[5] * a[5 + 0*BLOCK]; t1 -= region2[5] * a[5 + 1*BLOCK]; t2 -= region2[5] * a[5 + 2*BLOCK]; t3 -= region2[5] * a[5 + 3*BLOCK]; t0 -= region2[6] * a[6 + 0*BLOCK]; t1 -= region2[6] * a[6 + 1*BLOCK]; t2 -= region2[6] * a[6 + 2*BLOCK]; t3 -= region2[6] * a[6 + 3*BLOCK]; t0 -= region2[7] * a[7 + 0*BLOCK]; t1 -= region2[7] * a[7 + 1*BLOCK]; t2 -= region2[7] * a[7 + 2*BLOCK]; t3 -= region2[7] * a[7 + 3*BLOCK]; #if BLOCK>8 t0 -= region2[8] * a[8 + 0*BLOCK]; t1 -= region2[8] * a[8 + 1*BLOCK]; t2 -= region2[8] * a[8 + 2*BLOCK]; t3 -= region2[8] * a[8 + 3*BLOCK]; t0 -= region2[9] * a[9 + 0*BLOCK]; t1 -= region2[9] * a[9 + 1*BLOCK]; t2 -= region2[9] * a[9 + 2*BLOCK]; t3 -= region2[9] * a[9 + 3*BLOCK]; t0 -= region2[10] * a[10 + 0*BLOCK]; t1 -= region2[10] * a[10 + 1*BLOCK]; t2 -= region2[10] * a[10 + 2*BLOCK]; t3 -= region2[10] * a[10 + 3*BLOCK]; t0 -= region2[11] * a[11 + 0*BLOCK]; t1 -= region2[11] * a[11 + 1*BLOCK]; t2 -= region2[11] * a[11 + 2*BLOCK]; t3 -= region2[11] * a[11 + 3*BLOCK]; t0 -= region2[12] * a[12 + 0*BLOCK]; t1 -= region2[12] * a[12 + 1*BLOCK]; t2 -= region2[12] * a[12 + 2*BLOCK]; t3 -= region2[12] * a[12 + 3*BLOCK]; t0 -= region2[13] * a[13 + 0*BLOCK]; t1 -= region2[13] * a[13 + 1*BLOCK]; t2 -= region2[13] * a[13 + 2*BLOCK]; t3 -= region2[13] * a[13 + 3*BLOCK]; t0 -= region2[14] * a[14 + 0*BLOCK]; t1 -= region2[14] * a[14 + 1*BLOCK]; t2 -= region2[14] * a[14 + 2*BLOCK]; t3 -= region2[14] * a[14 + 3*BLOCK]; t0 -= region2[15] * a[15 + 0*BLOCK]; t1 -= region2[15] * a[15 + 1*BLOCK]; t2 -= region2[15] * a[15 + 2*BLOCK]; t3 -= region2[15] * a[15 + 3*BLOCK]; #endif region[0] = t0; region[1] = t1; region[2] = t2; region[3] = t3; a+=4*BLOCK; region+=4; } } else { #endif for (j = 0; j < BLOCK; j ++) { longDouble t00 = region[j]; for (k = 0; k < n; ++k) { t00 -= region2[k] * a[k + j * BLOCK]; } region[j] = t00; } #if BLOCK==16 } #endif } /* Uses factorization to solve. */ void ClpCholeskyDense::solveLongWork (longWork * region) { int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT; // later align on boundary longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks; int iBlock; longDouble * aa = a; int iColumn; for (iBlock=0;iBlocknumberRows_) { nChunk=numberRows_-iDo; } else { nChunk=BLOCK; } solveF1LongWork(aa,nChunk,region+iDo); for (jBlock=iBlock+1;jBlocknumberRows_) { nChunk=numberRows_-base; } else { nChunk=BLOCK; } solveF2LongWork(aa,nChunk,region+iDo,region+base); } aa+=BLOCKSQ; } // do diagonal outside for (iColumn=0;iColumn>1); aa = a+number_entries(offset-1); int lBase=(numberBlocks-1)*BLOCK; for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) { int nChunk; int jBlock; int triBase=iBlock*BLOCK; int iBase=lBase; for (jBlock=iBlock+1;jBlocknumberRows_) { nChunk=numberRows_-iBase; } else { nChunk=BLOCK; } solveB2LongWork(aa,nChunk,region+triBase,region+iBase); iBase-=BLOCK; aa-=BLOCKSQ; } if (triBase+BLOCK>numberRows_) { nChunk=numberRows_-triBase; } else { nChunk=BLOCK; } solveB1LongWork(aa,nChunk,region+triBase); aa-=BLOCKSQ; } } // Forward part of solve 1 void ClpCholeskyDense::solveF1LongWork(longDouble * a,int n,longWork * region) { int j, k; longWork t00; for (j = 0; j < n; j ++) { t00 = region[j]; for (k = 0; k < j; ++k) { t00 -= region[k] * a[j + k * BLOCK]; } //t00*=a[j + j * BLOCK]; region[j] = t00; } } // Forward part of solve 2 void ClpCholeskyDense::solveF2LongWork(longDouble * a,int n,longWork * region, longWork * region2) { int j, k; #if BLOCK==16 if (n==BLOCK) { for (k = 0; k < BLOCK; k+=4) { longWork t0 = region2[0]; longWork t1 = region2[1]; longWork t2 = region2[2]; longWork t3 = region2[3]; t0 -= region[0] * a[0 + 0 * BLOCK]; t1 -= region[0] * a[1 + 0 * BLOCK]; t2 -= region[0] * a[2 + 0 * BLOCK]; t3 -= region[0] * a[3 + 0 * BLOCK]; t0 -= region[1] * a[0 + 1 * BLOCK]; t1 -= region[1] * a[1 + 1 * BLOCK]; t2 -= region[1] * a[2 + 1 * BLOCK]; t3 -= region[1] * a[3 + 1 * BLOCK]; t0 -= region[2] * a[0 + 2 * BLOCK]; t1 -= region[2] * a[1 + 2 * BLOCK]; t2 -= region[2] * a[2 + 2 * BLOCK]; t3 -= region[2] * a[3 + 2 * BLOCK]; t0 -= region[3] * a[0 + 3 * BLOCK]; t1 -= region[3] * a[1 + 3 * BLOCK]; t2 -= region[3] * a[2 + 3 * BLOCK]; t3 -= region[3] * a[3 + 3 * BLOCK]; t0 -= region[4] * a[0 + 4 * BLOCK]; t1 -= region[4] * a[1 + 4 * BLOCK]; t2 -= region[4] * a[2 + 4 * BLOCK]; t3 -= region[4] * a[3 + 4 * BLOCK]; t0 -= region[5] * a[0 + 5 * BLOCK]; t1 -= region[5] * a[1 + 5 * BLOCK]; t2 -= region[5] * a[2 + 5 * BLOCK]; t3 -= region[5] * a[3 + 5 * BLOCK]; t0 -= region[6] * a[0 + 6 * BLOCK]; t1 -= region[6] * a[1 + 6 * BLOCK]; t2 -= region[6] * a[2 + 6 * BLOCK]; t3 -= region[6] * a[3 + 6 * BLOCK]; t0 -= region[7] * a[0 + 7 * BLOCK]; t1 -= region[7] * a[1 + 7 * BLOCK]; t2 -= region[7] * a[2 + 7 * BLOCK]; t3 -= region[7] * a[3 + 7 * BLOCK]; #if BLOCK>8 t0 -= region[8] * a[0 + 8 * BLOCK]; t1 -= region[8] * a[1 + 8 * BLOCK]; t2 -= region[8] * a[2 + 8 * BLOCK]; t3 -= region[8] * a[3 + 8 * BLOCK]; t0 -= region[9] * a[0 + 9 * BLOCK]; t1 -= region[9] * a[1 + 9 * BLOCK]; t2 -= region[9] * a[2 + 9 * BLOCK]; t3 -= region[9] * a[3 + 9 * BLOCK]; t0 -= region[10] * a[0 + 10 * BLOCK]; t1 -= region[10] * a[1 + 10 * BLOCK]; t2 -= region[10] * a[2 + 10 * BLOCK]; t3 -= region[10] * a[3 + 10 * BLOCK]; t0 -= region[11] * a[0 + 11 * BLOCK]; t1 -= region[11] * a[1 + 11 * BLOCK]; t2 -= region[11] * a[2 + 11 * BLOCK]; t3 -= region[11] * a[3 + 11 * BLOCK]; t0 -= region[12] * a[0 + 12 * BLOCK]; t1 -= region[12] * a[1 + 12 * BLOCK]; t2 -= region[12] * a[2 + 12 * BLOCK]; t3 -= region[12] * a[3 + 12 * BLOCK]; t0 -= region[13] * a[0 + 13 * BLOCK]; t1 -= region[13] * a[1 + 13 * BLOCK]; t2 -= region[13] * a[2 + 13 * BLOCK]; t3 -= region[13] * a[3 + 13 * BLOCK]; t0 -= region[14] * a[0 + 14 * BLOCK]; t1 -= region[14] * a[1 + 14 * BLOCK]; t2 -= region[14] * a[2 + 14 * BLOCK]; t3 -= region[14] * a[3 + 14 * BLOCK]; t0 -= region[15] * a[0 + 15 * BLOCK]; t1 -= region[15] * a[1 + 15 * BLOCK]; t2 -= region[15] * a[2 + 15 * BLOCK]; t3 -= region[15] * a[3 + 15 * BLOCK]; #endif region2[0] = t0; region2[1] = t1; region2[2] = t2; region2[3] = t3; region2+=4; a+=4; } } else { #endif for (k = 0; k < n; ++k) { longWork t00 = region2[k]; for (j = 0; j < BLOCK; j ++) { t00 -= region[j] * a[k + j * BLOCK]; } region2[k] = t00; } #if BLOCK==16 } #endif } // Backward part of solve 1 void ClpCholeskyDense::solveB1LongWork(longDouble * a,int n,longWork * region) { int j, k; longWork t00; for (j = n-1; j >=0; j --) { t00 = region[j]; for (k = j+1; k < n; ++k) { t00 -= region[k] * a[k + j * BLOCK]; } //t00*=a[j + j * BLOCK]; region[j] = t00; } } // Backward part of solve 2 void ClpCholeskyDense::solveB2LongWork(longDouble * a,int n,longWork * region, longWork * region2) { int j, k; #if BLOCK==16 if (n==BLOCK) { for (j = 0; j < BLOCK; j +=4) { longWork t0 = region[0]; longWork t1 = region[1]; longWork t2 = region[2]; longWork t3 = region[3]; t0 -= region2[0] * a[0 + 0*BLOCK]; t1 -= region2[0] * a[0 + 1*BLOCK]; t2 -= region2[0] * a[0 + 2*BLOCK]; t3 -= region2[0] * a[0 + 3*BLOCK]; t0 -= region2[1] * a[1 + 0*BLOCK]; t1 -= region2[1] * a[1 + 1*BLOCK]; t2 -= region2[1] * a[1 + 2*BLOCK]; t3 -= region2[1] * a[1 + 3*BLOCK]; t0 -= region2[2] * a[2 + 0*BLOCK]; t1 -= region2[2] * a[2 + 1*BLOCK]; t2 -= region2[2] * a[2 + 2*BLOCK]; t3 -= region2[2] * a[2 + 3*BLOCK]; t0 -= region2[3] * a[3 + 0*BLOCK]; t1 -= region2[3] * a[3 + 1*BLOCK]; t2 -= region2[3] * a[3 + 2*BLOCK]; t3 -= region2[3] * a[3 + 3*BLOCK]; t0 -= region2[4] * a[4 + 0*BLOCK]; t1 -= region2[4] * a[4 + 1*BLOCK]; t2 -= region2[4] * a[4 + 2*BLOCK]; t3 -= region2[4] * a[4 + 3*BLOCK]; t0 -= region2[5] * a[5 + 0*BLOCK]; t1 -= region2[5] * a[5 + 1*BLOCK]; t2 -= region2[5] * a[5 + 2*BLOCK]; t3 -= region2[5] * a[5 + 3*BLOCK]; t0 -= region2[6] * a[6 + 0*BLOCK]; t1 -= region2[6] * a[6 + 1*BLOCK]; t2 -= region2[6] * a[6 + 2*BLOCK]; t3 -= region2[6] * a[6 + 3*BLOCK]; t0 -= region2[7] * a[7 + 0*BLOCK]; t1 -= region2[7] * a[7 + 1*BLOCK]; t2 -= region2[7] * a[7 + 2*BLOCK]; t3 -= region2[7] * a[7 + 3*BLOCK]; #if BLOCK>8 t0 -= region2[8] * a[8 + 0*BLOCK]; t1 -= region2[8] * a[8 + 1*BLOCK]; t2 -= region2[8] * a[8 + 2*BLOCK]; t3 -= region2[8] * a[8 + 3*BLOCK]; t0 -= region2[9] * a[9 + 0*BLOCK]; t1 -= region2[9] * a[9 + 1*BLOCK]; t2 -= region2[9] * a[9 + 2*BLOCK]; t3 -= region2[9] * a[9 + 3*BLOCK]; t0 -= region2[10] * a[10 + 0*BLOCK]; t1 -= region2[10] * a[10 + 1*BLOCK]; t2 -= region2[10] * a[10 + 2*BLOCK]; t3 -= region2[10] * a[10 + 3*BLOCK]; t0 -= region2[11] * a[11 + 0*BLOCK]; t1 -= region2[11] * a[11 + 1*BLOCK]; t2 -= region2[11] * a[11 + 2*BLOCK]; t3 -= region2[11] * a[11 + 3*BLOCK]; t0 -= region2[12] * a[12 + 0*BLOCK]; t1 -= region2[12] * a[12 + 1*BLOCK]; t2 -= region2[12] * a[12 + 2*BLOCK]; t3 -= region2[12] * a[12 + 3*BLOCK]; t0 -= region2[13] * a[13 + 0*BLOCK]; t1 -= region2[13] * a[13 + 1*BLOCK]; t2 -= region2[13] * a[13 + 2*BLOCK]; t3 -= region2[13] * a[13 + 3*BLOCK]; t0 -= region2[14] * a[14 + 0*BLOCK]; t1 -= region2[14] * a[14 + 1*BLOCK]; t2 -= region2[14] * a[14 + 2*BLOCK]; t3 -= region2[14] * a[14 + 3*BLOCK]; t0 -= region2[15] * a[15 + 0*BLOCK]; t1 -= region2[15] * a[15 + 1*BLOCK]; t2 -= region2[15] * a[15 + 2*BLOCK]; t3 -= region2[15] * a[15 + 3*BLOCK]; #endif region[0] = t0; region[1] = t1; region[2] = t2; region[3] = t3; a+=4*BLOCK; region+=4; } } else { #endif for (j = 0; j < BLOCK; j ++) { longWork t00 = region[j]; for (k = 0; k < n; ++k) { t00 -= region2[k] * a[k + j * BLOCK]; } region[j] = t00; } #if BLOCK==16 } #endif }