// Copyright (C) 2003, International Business Machines
// Corporation and others. All Rights Reserved.
#include "CoinPragma.hpp"
#include "CoinHelperFunctions.hpp"
#include "ClpInterior.hpp"
#include "ClpCholeskyDense.hpp"
#include "ClpMessage.hpp"
#include "ClpQuadraticObjective.hpp"
//#############################################################################
// Constructors / Destructor / Assignment
//#############################################################################
//-------------------------------------------------------------------
// Default Constructor
//-------------------------------------------------------------------
ClpCholeskyDense::ClpCholeskyDense ()
: ClpCholeskyBase(),
borrowSpace_(false)
{
type_=11;;
}
//-------------------------------------------------------------------
// Copy constructor
//-------------------------------------------------------------------
ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs)
: ClpCholeskyBase(rhs),
borrowSpace_(rhs.borrowSpace_)
{
assert(!rhs.borrowSpace_||!rhs.sizeFactor_); // can't do if borrowing space
}
//-------------------------------------------------------------------
// Destructor
//-------------------------------------------------------------------
ClpCholeskyDense::~ClpCholeskyDense ()
{
if (borrowSpace_) {
// set NULL
sparseFactor_=NULL;
workDouble_=NULL;
diagonal_=NULL;
}
}
//----------------------------------------------------------------
// Assignment operator
//-------------------------------------------------------------------
ClpCholeskyDense &
ClpCholeskyDense::operator=(const ClpCholeskyDense& rhs)
{
if (this != &rhs) {
assert(!rhs.borrowSpace_||!rhs.sizeFactor_); // can't do if borrowing space
ClpCholeskyBase::operator=(rhs);
borrowSpace_=rhs.borrowSpace_;
}
return *this;
}
//-------------------------------------------------------------------
// Clone
//-------------------------------------------------------------------
ClpCholeskyBase * ClpCholeskyDense::clone() const
{
return new ClpCholeskyDense(*this);
}
// If not power of 2 then need to redo a bit
#define BLOCK 16
#define BLOCKSHIFT 4
#define BLOCKSQ ( BLOCK*BLOCK )
#define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT )
#define number_blocks(x) (((x)+BLOCK-1)>>BLOCKSHIFT)
#define number_rows(x) ((x)<<BLOCKSHIFT)
#define number_entries(x) ((x)<<BLOCKSQSHIFT)
/* Gets space */
int
ClpCholeskyDense::reserveSpace(const ClpCholeskyBase * factor, int numberRows)
{
numberRows_ = numberRows;
int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
// allow one stripe extra
numberBlocks = numberBlocks + ((numberBlocks*(numberBlocks+1))/2);
sizeFactor_=numberBlocks*BLOCKSQ;
//#define CHOL_COMPARE
#ifdef CHOL_COMPARE
sizeFactor_ += 95000;
#endif
if (!factor) {
sparseFactor_ = new longDouble [sizeFactor_];
rowsDropped_ = new char [numberRows_];
memset(rowsDropped_,0,numberRows_);
workDouble_ = new longDouble[numberRows_];
diagonal_ = new longDouble[numberRows_];
} else {
borrowSpace_=true;
int numberFull = factor->numberRows();
sparseFactor_ = factor->sparseFactor()+(factor->size()-sizeFactor_);
workDouble_ = factor->workDouble() + (numberFull-numberRows_);
diagonal_ = factor->diagonal() + (numberFull-numberRows_);
}
numberRowsDropped_=0;
return 0;
}
/* Returns space needed */
CoinBigIndex
ClpCholeskyDense::space( int numberRows) const
{
int numberBlocks = (numberRows+BLOCK-1)>>BLOCKSHIFT;
// allow one stripe extra
numberBlocks = numberBlocks + ((numberBlocks*(numberBlocks+1))/2);
CoinBigIndex sizeFactor=numberBlocks*BLOCKSQ;
#ifdef CHOL_COMPARE
sizeFactor += 95000;
#endif
return sizeFactor;
}
/* Orders rows and saves pointer to matrix.and model */
int
ClpCholeskyDense::order(ClpInterior * model)
{
model_=model;
int numberRows;
int numberRowsModel = model_->numberRows();
int numberColumns = model_->numberColumns();
if (!doKKT_) {
numberRows = numberRowsModel;
} else {
numberRows = 2*numberRowsModel+numberColumns;
}
reserveSpace(NULL,numberRows);
rowCopy_ = model->clpMatrix()->reverseOrderedCopy();
return 0;
}
/* Does Symbolic factorization given permutation.
This is called immediately after order. If user provides this then
user must provide factorize and solve. Otherwise the default factorization is used
returns non-zero if not enough memory */
int
ClpCholeskyDense::symbolic()
{
return 0;
}
/* Factorize - filling in rowsDropped and returning number dropped */
int
ClpCholeskyDense::factorize(const double * diagonal, int * rowsDropped)
{
assert (!borrowSpace_);
const CoinBigIndex * columnStart = model_->clpMatrix()->getVectorStarts();
const int * columnLength = model_->clpMatrix()->getVectorLengths();
const int * row = model_->clpMatrix()->getIndices();
const double * element = model_->clpMatrix()->getElements();
const CoinBigIndex * rowStart = rowCopy_->getVectorStarts();
const int * rowLength = rowCopy_->getVectorLengths();
const int * column = rowCopy_->getIndices();
const double * elementByRow = rowCopy_->getElements();
int numberColumns=model_->clpMatrix()->getNumCols();
CoinZeroN(sparseFactor_,sizeFactor_);
//perturbation
longDouble perturbation=model_->diagonalPerturbation()*model_->diagonalNorm();
perturbation=perturbation*perturbation;
if (perturbation>1.0) {
#ifdef COIN_DEVELOP
//if (model_->model()->logLevel()&4)
std::cout <<"large perturbation "<<perturbation<<std::endl;
#endif
perturbation=sqrt(perturbation);;
perturbation=1.0;
}
int iRow;
int newDropped=0;
double largest=1.0;
double smallest=COIN_DBL_MAX;
longDouble delta2 = model_->delta(); // add delta*delta to diagonal
delta2 *= delta2;
if (!doKKT_) {
longDouble * work = sparseFactor_;
work--; // skip diagonal
int addOffset=numberRows_-1;
const double * diagonalSlack = diagonal + numberColumns;
// largest in initial matrix
double largest2=1.0e-20;
for (iRow=0;iRow<numberRows_;iRow++) {
if (!rowsDropped_[iRow]) {
CoinBigIndex startRow=rowStart[iRow];
CoinBigIndex endRow=rowStart[iRow]+rowLength[iRow];
longDouble diagonalValue = diagonalSlack[iRow]+delta2;
for (CoinBigIndex k=startRow;k<endRow;k++) {
int iColumn=column[k];
CoinBigIndex start=columnStart[iColumn];
CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn];
longDouble multiplier = diagonal[iColumn]*elementByRow[k];
for (CoinBigIndex j=start;j<end;j++) {
int jRow=row[j];
if (!rowsDropped_[jRow]) {
if (jRow>iRow) {
longDouble value=element[j]*multiplier;
work[jRow] += value;
} else if (jRow==iRow) {
longDouble value=element[j]*multiplier;
diagonalValue += value;
}
}
}
}
for (int j=iRow+1;j<numberRows_;j++)
largest2 = CoinMax(largest2,fabs(work[j]));
diagonal_[iRow]=diagonalValue;
largest2 = CoinMax(largest2,fabs(diagonalValue));
} else {
// dropped
diagonal_[iRow]=1.0;
}
addOffset--;
work += addOffset;
}
//check sizes
largest2*=1.0e-20;
largest = CoinMin(largest2,1.0e-11);
int numberDroppedBefore=0;
for (iRow=0;iRow<numberRows_;iRow++) {
int dropped=rowsDropped_[iRow];
// Move to int array
rowsDropped[iRow]=dropped;
if (!dropped) {
longDouble diagonal = diagonal_[iRow];
if (diagonal>largest2) {
diagonal_[iRow]=diagonal+perturbation;
} else {
diagonal_[iRow]=diagonal+perturbation;
rowsDropped[iRow]=2;
numberDroppedBefore++;
}
}
}
doubleParameters_[10]=CoinMax(1.0e-20,largest);
integerParameters_[20]=0;
doubleParameters_[3]=0.0;
doubleParameters_[4]=COIN_DBL_MAX;
integerParameters_[34]=0; // say all must be positive
#ifdef CHOL_COMPARE
if (numberRows_<200)
factorizePart3(rowsDropped);
#endif
factorizePart2(rowsDropped);
newDropped=integerParameters_[20]+numberDroppedBefore;
largest=doubleParameters_[3];
smallest=doubleParameters_[4];
if (model_->messageHandler()->logLevel()>1)
std::cout<<"Cholesky - largest "<<largest<<" smallest "<<smallest<<std::endl;
choleskyCondition_=largest/smallest;
//drop fresh makes some formADAT easier
if (newDropped||numberRowsDropped_) {
newDropped=0;
for (int i=0;i<numberRows_;i++) {
char dropped = rowsDropped[i];
rowsDropped_[i]=dropped;
if (dropped==2) {
//dropped this time
rowsDropped[newDropped++]=i;
rowsDropped_[i]=0;
}
}
numberRowsDropped_=newDropped;
newDropped=-(2+newDropped);
}
} else {
// KKT
CoinPackedMatrix * quadratic = NULL;
ClpQuadraticObjective * quadraticObj =
(dynamic_cast< ClpQuadraticObjective*>(model_->objectiveAsObject()));
if (quadraticObj)
quadratic = quadraticObj->quadraticObjective();
// matrix
int numberRowsModel = model_->numberRows();
int numberColumns = model_->numberColumns();
int numberTotal = numberColumns + numberRowsModel;
longDouble * work = sparseFactor_;
work--; // skip diagonal
int addOffset=numberRows_-1;
int iColumn;
if (!quadratic) {
for (iColumn=0;iColumn<numberColumns;iColumn++) {
longDouble value = diagonal[iColumn];
if (fabs(value)>1.0e-100) {
value = 1.0/value;
largest = CoinMax(largest,fabs(value));
diagonal_[iColumn] = -value;
CoinBigIndex start=columnStart[iColumn];
CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn];
for (CoinBigIndex j=start;j<end;j++) {
//choleskyRow_[numberElements]=row[j]+numberTotal;
//sparseFactor_[numberElements++]=element[j];
work[row[j]+numberTotal]=element[j];
largest = CoinMax(largest,fabs(element[j]));
}
} else {
diagonal_[iColumn] = -value;
}
addOffset--;
work += addOffset;
}
} else {
// Quadratic
const int * columnQuadratic = quadratic->getIndices();
const CoinBigIndex * columnQuadraticStart = quadratic->getVectorStarts();
const int * columnQuadraticLength = quadratic->getVectorLengths();
const double * quadraticElement = quadratic->getElements();
for (iColumn=0;iColumn<numberColumns;iColumn++) {
longDouble value = diagonal[iColumn];
CoinBigIndex j;
if (fabs(value)>1.0e-100) {
value = 1.0/value;
for (j=columnQuadraticStart[iColumn];
j<columnQuadraticStart[iColumn]+columnQuadraticLength[iColumn];j++) {
int jColumn = columnQuadratic[j];
if (jColumn>iColumn) {
work[jColumn]=-quadraticElement[j];
} else if (iColumn==jColumn) {
value += quadraticElement[j];
}
}
largest = CoinMax(largest,fabs(value));
diagonal_[iColumn] = -value;
CoinBigIndex start=columnStart[iColumn];
CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn];
for (j=start;j<end;j++) {
work[row[j]+numberTotal]=element[j];
largest = CoinMax(largest,fabs(element[j]));
}
} else {
value = 1.0e100;
diagonal_[iColumn] = -value;
}
addOffset--;
work += addOffset;
}
}
// slacks
for (iColumn=numberColumns;iColumn<numberTotal;iColumn++) {
longDouble value = diagonal[iColumn];
if (fabs(value)>1.0e-100) {
value = 1.0/value;
largest = CoinMax(largest,fabs(value));
} else {
value = 1.0e100;
}
diagonal_[iColumn] = -value;
work[iColumn-numberColumns+numberTotal]=-1.0;
addOffset--;
work += addOffset;
}
// Finish diagonal
for (iRow=0;iRow<numberRowsModel;iRow++) {
diagonal_[iRow+numberTotal]=delta2;
}
//check sizes
largest*=1.0e-20;
largest = CoinMin(largest,1.0e-11);
doubleParameters_[10]=CoinMax(1.0e-20,largest);
integerParameters_[20]=0;
doubleParameters_[3]=0.0;
doubleParameters_[4]=COIN_DBL_MAX;
// Set up LDL cutoff
integerParameters_[34]=numberTotal;
// KKT
int * rowsDropped2 = new int[numberRows_];
CoinZeroN(rowsDropped2,numberRows_);
#ifdef CHOL_COMPARE
if (numberRows_<200)
factorizePart3(rowsDropped2);
#endif
factorizePart2(rowsDropped2);
newDropped=integerParameters_[20];
largest=doubleParameters_[3];
smallest=doubleParameters_[4];
if (model_->messageHandler()->logLevel()>1)
std::cout<<"Cholesky - largest "<<largest<<" smallest "<<smallest<<std::endl;
choleskyCondition_=largest/smallest;
// Should save adjustments in ..R_
int n1=0,n2=0;
double * primalR = model_->primalR();
double * dualR = model_->dualR();
for (iRow=0;iRow<numberTotal;iRow++) {
if (rowsDropped2[iRow]) {
n1++;
//printf("row region1 %d dropped\n",iRow);
//rowsDropped_[iRow]=1;
rowsDropped_[iRow]=0;
primalR[iRow]=doubleParameters_[20];
} else {
rowsDropped_[iRow]=0;
primalR[iRow]=0.0;
}
}
for (;iRow<numberRows_;iRow++) {
if (rowsDropped2[iRow]) {
n2++;
//printf("row region2 %d dropped\n",iRow);
//rowsDropped_[iRow]=1;
rowsDropped_[iRow]=0;
dualR[iRow-numberTotal]=doubleParameters_[34];
} else {
rowsDropped_[iRow]=0;
dualR[iRow-numberTotal]=0.0;
}
}
}
return 0;
}
/* Factorize - filling in rowsDropped and returning number dropped */
void
ClpCholeskyDense::factorizePart3( int * rowsDropped)
{
int iColumn;
longDouble * xx = sparseFactor_;
longDouble * yy = diagonal_;
diagonal_ = sparseFactor_ + 40000;
sparseFactor_=diagonal_ + numberRows_;
//memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));
memcpy(sparseFactor_,xx,40000*sizeof(double));
memcpy(diagonal_,yy,numberRows_*sizeof(double));
int numberDropped=0;
double largest=0.0;
double smallest=COIN_DBL_MAX;
double dropValue = doubleParameters_[10];
int firstPositive=integerParameters_[34];
longDouble * work = sparseFactor_;
// Allow for triangular
int addOffset=numberRows_-1;
work--;
for (iColumn=0;iColumn<numberRows_;iColumn++) {
int iRow;
int addOffsetNow = numberRows_-1;;
longDouble * workNow=sparseFactor_-1+iColumn;
double diagonalValue = diagonal_[iColumn];
for (iRow=0;iRow<iColumn;iRow++) {
double aj = *workNow;
addOffsetNow--;
workNow += addOffsetNow;
double multiplier = workDouble_[iRow];
diagonalValue -= aj*aj*multiplier;
}
bool dropColumn=false;
if (iColumn<firstPositive) {
// must be negative
if (diagonalValue<=-dropValue) {
smallest = CoinMin(smallest,-diagonalValue);
largest = CoinMax(largest,-diagonalValue);
workDouble_[iColumn]=diagonalValue;
diagonalValue = 1.0/diagonalValue;
} else {
dropColumn=true;
workDouble_[iColumn]=-1.0e100;
diagonalValue=0.0;
integerParameters_[20]++;
}
} else {
// must be positive
if (diagonalValue>=dropValue) {
smallest = CoinMin(smallest,diagonalValue);
largest = CoinMax(largest,diagonalValue);
workDouble_[iColumn]=diagonalValue;
diagonalValue = 1.0/diagonalValue;
} else {
dropColumn=true;
workDouble_[iColumn]=1.0e100;
diagonalValue=0.0;
integerParameters_[20]++;
}
}
if (!dropColumn) {
diagonal_[iColumn]=diagonalValue;
for (iRow=iColumn+1;iRow<numberRows_;iRow++) {
double value = work[iRow];
workNow = sparseFactor_-1;
int addOffsetNow = numberRows_-1;;
for (int jColumn=0;jColumn<iColumn;jColumn++) {
double aj = workNow[iColumn];
double multiplier = workDouble_[jColumn];
double ai = workNow[iRow];
addOffsetNow--;
workNow += addOffsetNow;
value -= aj*ai*multiplier;
}
work[iRow] = value*diagonalValue;
}
} else {
// drop column
rowsDropped[iColumn]=2;
numberDropped++;
diagonal_[iColumn]=0.0;
for (iRow=iColumn+1;iRow<numberRows_;iRow++) {
work[iRow] = 0.0;
}
}
addOffset--;
work += addOffset;
}
doubleParameters_[3]=largest;
doubleParameters_[4]=smallest;
integerParameters_[20]=numberDropped;
sparseFactor_=xx;
diagonal_=yy;
}
//#define POS_DEBUG
#ifdef POS_DEBUG
static int counter=0;
int ClpCholeskyDense::bNumber(const longDouble * array, int &iRow, int &iCol)
{
int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
int k = array-a;
assert ((k%BLOCKSQ)==0);
iCol=0;
int kk=k>>BLOCKSQSHIFT;
//printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);
k=kk;
while (k>=numberBlocks) {
iCol++;
k -= numberBlocks;
numberBlocks--;
}
iRow = iCol+k;
counter++;
if(counter>100000)
exit(77);
return kk;
}
#endif
/* Factorize - filling in rowsDropped and returning number dropped */
void
ClpCholeskyDense::factorizePart2( int * rowsDropped)
{
int iColumn;
//longDouble * xx = sparseFactor_;
//longDouble * yy = diagonal_;
//diagonal_ = sparseFactor_ + 40000;
//sparseFactor_=diagonal_ + numberRows_;
//memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));
//memcpy(sparseFactor_,xx,40000*sizeof(double));
//memcpy(diagonal_,yy,numberRows_*sizeof(double));
int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
// later align on boundary
longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
int n=numberRows_;
int nRound = numberRows_&(~(BLOCK-1));
// adjust if exact
if (nRound==n)
nRound -= BLOCK;
int sizeLastBlock=n-nRound;
int get = n*(n-1)/2; // ? as no diagonal
int block = numberBlocks*(numberBlocks+1)/2;
int ifOdd;
int rowLast;
if (sizeLastBlock!=BLOCK) {
longDouble * aa = &a[(block-1)*BLOCKSQ];
rowLast=nRound-1;
ifOdd=1;
int put = BLOCKSQ;
// do last separately
put -= (BLOCK-sizeLastBlock)*(BLOCK+1);
for (iColumn=numberRows_-1;iColumn>=nRound;iColumn--) {
int put2 = put;
put -= BLOCK;
for (int iRow=numberRows_-1;iRow>iColumn;iRow--) {
aa[--put2] = sparseFactor_[--get];
assert (aa+put2>=sparseFactor_+get);
}
// save diagonal as well
aa[--put2]=diagonal_[iColumn];
}
n=nRound;
block--;
} else {
// exact fit
rowLast=numberRows_-1;
ifOdd=0;
}
// Now main loop
int nBlock=0;
for (;n>0;n-=BLOCK) {
longDouble * aa = &a[(block-1)*BLOCKSQ];
longDouble * aaLast=NULL;
int put = BLOCKSQ;
int putLast=0;
// see if we have small block
if (ifOdd) {
aaLast = &a[(block-1)*BLOCKSQ];
aa = aaLast-BLOCKSQ;
putLast = BLOCKSQ-BLOCK+sizeLastBlock;
}
for (iColumn=n-1;iColumn>=n-BLOCK;iColumn--) {
if (aaLast) {
// last bit
for (int iRow=numberRows_-1;iRow>rowLast;iRow--) {
aaLast[--putLast] = sparseFactor_[--get];
assert (aaLast+putLast>=sparseFactor_+get);
}
putLast -= BLOCK-sizeLastBlock;
}
longDouble * aPut = aa;
int j=rowLast;
for (int jBlock=0;jBlock<=nBlock;jBlock++) {
int put2 = put;
int last = CoinMax(j-BLOCK,iColumn);
for (int iRow=j;iRow>last;iRow--) {
aPut[--put2] = sparseFactor_[--get];
assert (aPut+put2>=sparseFactor_+get);
}
if (j-BLOCK<iColumn) {
// save diagonal as well
aPut[--put2]=diagonal_[iColumn];
}
j -= BLOCK;
aPut -=BLOCKSQ;
}
put -= BLOCK;
}
nBlock++;
block -= nBlock+ifOdd;
}
factor(a,numberRows_,numberBlocks,
diagonal_,workDouble_,rowsDropped);
//sparseFactor_=xx;
//diagonal_=yy;
}
// Non leaf recursive factor
void
ClpCholeskyDense::factor(longDouble * a, int n, int numberBlocks,
longDouble * diagonal, longDouble * work, int * rowsDropped)
{
if (n<=BLOCK) {
factorLeaf(a,n,diagonal,work,rowsDropped);
} else {
int nb=number_blocks((n+1)>>1);
int nThis=number_rows(nb);
longDouble * aother;
int nLeft=n-nThis;
int nintri=(nb*(nb+1))>>1;
int nbelow=(numberBlocks-nb)*nb;
factor(a,nThis,numberBlocks,diagonal,work,rowsDropped);
triRec(a,nThis,a+number_entries(nb),diagonal,work,nLeft,nb,0,numberBlocks);
aother=a+number_entries(nintri+nbelow);
recTri(a+number_entries(nb),nLeft,nThis,nb,0,aother,diagonal,work,numberBlocks);
factor(aother,nLeft,
numberBlocks-nb,diagonal+nThis,work+nThis,rowsDropped);
}
}
// Non leaf recursive triangle rectangle update
void
ClpCholeskyDense::triRec(longDouble * aTri, int nThis, longDouble * aUnder,
longDouble * diagonal, longDouble * work,
int nLeft, int iBlock, int jBlock,
int numberBlocks)
{
if (nThis<=BLOCK&&nLeft<=BLOCK) {
triRecLeaf(aTri,aUnder,diagonal,work,nLeft);
} else if (nThis<nLeft) {
int nb=number_blocks((nLeft+1)>>1);
int nLeft2=number_rows(nb);
triRec(aTri,nThis,aUnder,diagonal,work,nLeft2,iBlock,jBlock,numberBlocks);
triRec(aTri,nThis,aUnder+number_entries(nb),diagonal,work,nLeft-nLeft2,
iBlock+nb,jBlock,numberBlocks);
} else {
int nb=number_blocks((nThis+1)>>1);
int nThis2=number_rows(nb);
longDouble * aother;
int kBlock=jBlock+nb;
int i;
int nintri=(nb*(nb+1))>>1;
int nbelow=(numberBlocks-nb)*nb;
triRec(aTri,nThis2,aUnder,diagonal,work,nLeft,iBlock,jBlock,numberBlocks);
/* and rectangular update */
i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)-
(numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1;
aother=aUnder+number_entries(i);
recRec(aTri+number_entries(nb),nThis-nThis2,nLeft,nThis2,aUnder,aother,
diagonal,work,iBlock,kBlock,jBlock,numberBlocks);
triRec(aTri+number_entries(nintri+nbelow),nThis-nThis2,aother,diagonal+nThis2,
work+nThis2,nLeft,
iBlock-nb,kBlock-nb,numberBlocks-nb);
}
}
// Non leaf recursive rectangle triangle update
void
ClpCholeskyDense::recTri(longDouble * aUnder, int nTri, int nDo,
int iBlock, int jBlock,longDouble * aTri,
longDouble * diagonal, longDouble * work,
int numberBlocks)
{
if (nTri<=BLOCK&&nDo<=BLOCK) {
recTriLeaf(aUnder,aTri,diagonal,work,nTri);
} else if (nTri<nDo) {
int nb=number_blocks((nDo+1)>>1);
int nDo2=number_rows(nb);
longDouble * aother;
int i;
recTri(aUnder,nTri,nDo2,iBlock,jBlock,aTri,diagonal,work,numberBlocks);
i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)-
(numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1;
aother=aUnder+number_entries(i);
recTri(aother,nTri,nDo-nDo2,iBlock-nb,jBlock,aTri,diagonal+nDo2,
work+nDo2,numberBlocks-nb);
} else {
int nb=number_blocks((nTri+1)>>1);
int nTri2=number_rows(nb);
longDouble * aother;
int i;
recTri(aUnder,nTri2,nDo,iBlock,jBlock,aTri,diagonal,work,numberBlocks);
/* and rectangular update */
i=((numberBlocks-iBlock)*(numberBlocks-iBlock+1)-
(numberBlocks-iBlock-nb)*(numberBlocks-iBlock-nb+1))>>1;
aother=aTri+number_entries(nb);
recRec(aUnder,nTri2,nTri-nTri2,nDo,aUnder+number_entries(nb),aother,
diagonal,work,iBlock+nb,iBlock,jBlock,numberBlocks);
recTri(aUnder+number_entries(nb),nTri-nTri2,nDo,iBlock+nb,jBlock,
aTri+number_entries(i),diagonal,work,numberBlocks);
}
}
#ifdef CHOL_USING_BLAS
// using simple blas interface
extern "C"
{
/** BLAS Fortran subroutine DGEMM. */
void F77_FUNC(dgemm,DGEMM)(char* transa, char* transb,
ipfint *m, ipfint *n, ipfint *k,
const double *alpha, const double *a, ipfint *lda,
const double *b, ipfint *ldb, const double *beta,
double *c, ipfint *ldc,
int transa_len, int transb_len);
}
#endif
/* Non leaf recursive rectangle rectangle update,
nUnder is number of rows in iBlock,
nUnderK is number of rows in kBlock
*/
void
ClpCholeskyDense::recRec(longDouble * above, int nUnder, int nUnderK,
int nDo, longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work,
int kBlock,int iBlock, int jBlock,
int numberBlocks)
{
#ifdef CHOL_USING_BLAS
recRecLeaf(above , aUnder , aOther, diagonal,work, nUnderK);
void
ClpCholeskyDense::recRecLeaf(longDouble * above,
longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work,
int nUnder)
aa = aOther-BLOCK;
for (j = 0; j < BLOCK; j ++) {
aa+=BLOCK;
for (i=0;i< nUnderK;i++) {
longDouble t00=aa[i+0*BLOCK];
for (k=0;k<BLOCK;k++) {
longDouble a00=aUnder[i+k*BLOCK]*work[k];
t00 -= a00 * above[j + k * BLOCK];
}
aa[i]=t00;
}
return;
#endif
if (nDo<=BLOCK&&nUnder<=BLOCK&&nUnderK<=BLOCK) {
assert (nDo==BLOCK&&nUnder==BLOCK);
recRecLeaf(above , aUnder , aOther, diagonal,work, nUnderK);
} else if (nDo<=nUnderK&&nUnder<=nUnderK) {
int nb=number_blocks((nUnderK+1)>>1);
int nUnder2=number_rows(nb);
recRec(above,nUnder,nUnder2,nDo,aUnder,aOther,diagonal,work,
kBlock,iBlock,jBlock,numberBlocks);
recRec(above,nUnder,nUnderK-nUnder2,nDo,aUnder+number_entries(nb),
aOther+number_entries(nb),diagonal,work,kBlock+nb,iBlock,jBlock,numberBlocks);
} else if (nUnderK<=nDo&&nUnder<=nDo) {
int nb=number_blocks((nDo+1)>>1);
int nDo2=number_rows(nb);
int i;
recRec(above,nUnder,nUnderK,nDo2,aUnder,aOther,diagonal,work,
kBlock,iBlock,jBlock,numberBlocks);
i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)-
(numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1;
recRec(above+number_entries(i),nUnder,nUnderK,nDo-nDo2,
aUnder+number_entries(i),
aOther,diagonal+nDo2,work+nDo2,
kBlock-nb,iBlock-nb,jBlock,numberBlocks-nb);
} else {
int nb=number_blocks((nUnder+1)>>1);
int nUnder2=number_rows(nb);
int i;
recRec(above,nUnder2,nUnderK,nDo,aUnder,aOther,diagonal,work,
kBlock,iBlock,jBlock,numberBlocks);
i=((numberBlocks-iBlock)*(numberBlocks-iBlock-1)-
(numberBlocks-iBlock-nb)*(numberBlocks-iBlock-nb-1))>>1;
recRec(above+number_entries(nb),nUnder-nUnder2,nUnderK,nDo,aUnder,
aOther+number_entries(i),diagonal,work,kBlock,iBlock+nb,jBlock,numberBlocks);
}
}
// Leaf recursive factor
void
ClpCholeskyDense::factorLeaf(longDouble * a, int n,
longDouble * diagonal, longDouble * work, int * rowsDropped)
{
longDouble largest=doubleParameters_[3];
longDouble smallest=doubleParameters_[4];
double dropValue = doubleParameters_[10];
int firstPositive=integerParameters_[34];
int rowOffset=diagonal-diagonal_;
int numberDropped=0;
int i, j, k;
longDouble t00,temp1;
longDouble * aa;
aa = a-BLOCK;
for (j = 0; j < n; j ++) {
aa+=BLOCK;
t00 = aa[j];
for (k = 0; k < j; ++k) {
longDouble multiplier = work[k];
t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier;
}
bool dropColumn=false;
longDouble useT00=t00;
if (j+rowOffset<firstPositive) {
// must be negative
if (t00<=-dropValue) {
smallest = CoinMin(smallest,-t00);
largest = CoinMax(largest,-t00);
//aa[j]=t00;
t00 = 1.0/t00;
} else {
dropColumn=true;
//aa[j]=-1.0e100;
useT00=-1.0e-100;
t00=0.0;
integerParameters_[20]++;
}
} else {
// must be positive
if (t00>=dropValue) {
smallest = CoinMin(smallest,t00);
largest = CoinMax(largest,t00);
//aa[j]=t00;
t00 = 1.0/t00;
} else {
dropColumn=true;
//aa[j]=1.0e100;
useT00=1.0e-100;
t00=0.0;
integerParameters_[20]++;
}
}
if (!dropColumn) {
diagonal[j]=t00;
work[j]=useT00;
temp1 = t00;
for (i=j+1;i<n;i++) {
t00=aa[i];
for (k = 0; k < j; ++k) {
longDouble multiplier = work[k];
t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier;
}
aa[i]=t00*temp1;
}
} else {
// drop column
rowsDropped[j+rowOffset]=2;
numberDropped++;
diagonal[j]=0.0;
//aa[j]=1.0e100;
work[j]=1.0e100;
for (i=j+1;i<n;i++) {
aa[i]=0.0;
}
}
}
doubleParameters_[3]=largest;
doubleParameters_[4]=smallest;
integerParameters_[20]+=numberDropped;
}
// Leaf recursive triangle rectangle update
void
ClpCholeskyDense::triRecLeaf(longDouble * aTri, longDouble * aUnder, longDouble * diagonal, longDouble * work,
int nUnder)
{
#ifdef POS_DEBUG
int iru,icu;
int iu=bNumber(aUnder,iru,icu);
int irt,ict;
int it=bNumber(aTri,irt,ict);
//printf("%d %d\n",iu,it);
printf("trirecleaf under (%d,%d), tri (%d,%d)\n",
iru,icu,irt,ict);
assert (diagonal==diagonal_+ict*BLOCK);
#endif
int j;
longDouble * aa;
#if BLOCK==16
if (nUnder==BLOCK) {
aa = aTri-2*BLOCK;
for (j = 0; j < BLOCK; j +=2) {
aa+=2*BLOCK;
longDouble temp0 = diagonal[j];
longDouble temp1 = diagonal[j+1];
for (int i=0;i<BLOCK;i+=2) {
longDouble t00=aUnder[i+j*BLOCK];
longDouble t10=aUnder[i+ BLOCK + j*BLOCK];
longDouble t01=aUnder[i+1+j*BLOCK];
longDouble t11=aUnder[i+1+ BLOCK + j*BLOCK];
for (int k = 0; k < j; ++k) {
longDouble multiplier=work[k];
longDouble au0 = aUnder[i + k * BLOCK]*multiplier;
longDouble au1 = aUnder[i + 1 + k * BLOCK]*multiplier;
longDouble at0 = aTri[j + k * BLOCK];
longDouble at1 = aTri[j + 1 + k * BLOCK];
t00 -= au0 * at0;
t10 -= au0 * at1;
t01 -= au1 * at0;
t11 -= au1 * at1;
}
t00 *= temp0;
longDouble at1 = aTri[j + 1 + j*BLOCK]*work[j];
t10 -= t00 * at1;
t01 *= temp0;
t11 -= t01 * at1;
aUnder[i+j*BLOCK]=t00;
aUnder[i+1+j*BLOCK]=t01;
aUnder[i+ BLOCK + j*BLOCK]=t10*temp1;
aUnder[i+1+ BLOCK + j*BLOCK]=t11*temp1;
}
}
} else {
#endif
aa = aTri-BLOCK;
for (j = 0; j < BLOCK; j ++) {
aa+=BLOCK;
longDouble temp1 = diagonal[j];
for (int i=0;i<nUnder;i++) {
longDouble t00=aUnder[i+j*BLOCK];
for (int k = 0; k < j; ++k) {
longDouble multiplier=work[k];
t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier;
}
aUnder[i+j*BLOCK]=t00*temp1;
}
}
#if BLOCK==16
}
#endif
}
// Leaf recursive rectangle triangle update
void ClpCholeskyDense::recTriLeaf(longDouble * aUnder, longDouble * aTri,
longDouble * diagonal, longDouble * work, int nUnder)
{
#ifdef POS_DEBUG
int iru,icu;
int iu=bNumber(aUnder,iru,icu);
int irt,ict;
int it=bNumber(aTri,irt,ict);
//printf("%d %d\n",iu,it);
printf("rectrileaf under (%d,%d), tri (%d,%d)\n",
iru,icu,irt,ict);
assert (diagonal==diagonal_+icu*BLOCK);
#endif
int i, j, k;
longDouble t00;
longDouble * aa;
#if BLOCK==16
if (nUnder==BLOCK) {
longDouble * aUnder2 = aUnder-2;
aa = aTri-2*BLOCK;
for (j = 0; j < BLOCK; j +=2) {
longDouble t00,t01,t10,t11;
aa+=2*BLOCK;
aUnder2+=2;
t00=aa[j];
t01=aa[j+1];
t10=aa[j+1+BLOCK];
for (k = 0; k < BLOCK; ++k) {
longDouble multiplier = work[k];
longDouble a0 = aUnder2[k * BLOCK];
longDouble a1 = aUnder2[1 + k * BLOCK];
longDouble x0 = a0 * multiplier;
longDouble x1 = a1 * multiplier;
t00 -= a0 * x0;
t01 -= a1 * x0;
t10 -= a1 * x1;
}
aa[j]=t00;
aa[j+1]=t01;
aa[j+1+BLOCK]=t10;
for (i=j+2;i< BLOCK;i+=2) {
t00=aa[i];
t01=aa[i+BLOCK];
t10=aa[i+1];
t11=aa[i+1+BLOCK];
for (k = 0; k < BLOCK; ++k) {
longDouble multiplier = work[k];
longDouble a0 = aUnder2[k * BLOCK]*multiplier;
longDouble a1 = aUnder2[1 + k * BLOCK]*multiplier;
t00 -= aUnder[i + k * BLOCK] * a0;
t01 -= aUnder[i + k * BLOCK] * a1;
t10 -= aUnder[i + 1 + k * BLOCK] * a0;
t11 -= aUnder[i + 1 + k * BLOCK] * a1;
}
aa[i]=t00;
aa[i+BLOCK]=t01;
aa[i+1]=t10;
aa[i+1+BLOCK]=t11;
}
}
} else {
#endif
aa = aTri-BLOCK;
for (j = 0; j < nUnder; j ++) {
aa+=BLOCK;
for (i=j;i< nUnder;i++) {
t00=aa[i];
for (k = 0; k < BLOCK; ++k) {
longDouble multiplier = work[k];
t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK]*multiplier;
}
aa[i]=t00;
}
}
#if BLOCK==16
}
#endif
}
/* Leaf recursive rectangle rectangle update,
nUnder is number of rows in iBlock,
nUnderK is number of rows in kBlock
*/
void
ClpCholeskyDense::recRecLeaf(longDouble * above,
longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work,
int nUnder)
{
#ifdef POS_DEBUG
int ira,ica;
int ia=bNumber(above,ira,ica);
int iru,icu;
int iu=bNumber(aUnder,iru,icu);
int iro,ico;
int io=bNumber(aOther,iro,ico);
//printf("%d %d %d\n",ia,iu,io);
printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n",
ira,ica,iru,icu,iro,ico);
assert (diagonal==diagonal_+ica*BLOCK);
#endif
int i, j, k;
longDouble * aa;
#if BLOCK==16
aa = aOther-4*BLOCK;
if (nUnder==BLOCK) {
//#define INTEL
#ifdef INTEL
aa+=2*BLOCK;
for (j = 0; j < BLOCK; j +=2) {
aa+=2*BLOCK;
for (i=0;i< BLOCK;i+=2) {
longDouble t00=aa[i+0*BLOCK];
longDouble t10=aa[i+1*BLOCK];
longDouble t01=aa[i+1+0*BLOCK];
longDouble t11=aa[i+1+1*BLOCK];
for (k=0;k<BLOCK;k++) {
longDouble multiplier = work[k];
longDouble a00=aUnder[i+k*BLOCK]*multiplier;
longDouble a01=aUnder[i+1+k*BLOCK]*multiplier;
t00 -= a00 * above[j + 0 + k * BLOCK];
t10 -= a00 * above[j + 1 + k * BLOCK];
t01 -= a01 * above[j + 0 + k * BLOCK];
t11 -= a01 * above[j + 1 + k * BLOCK];
}
aa[i+0*BLOCK]=t00;
aa[i+1*BLOCK]=t10;
aa[i+1+0*BLOCK]=t01;
aa[i+1+1*BLOCK]=t11;
}
}
#else
for (j = 0; j < BLOCK; j +=4) {
aa+=4*BLOCK;
for (i=0;i< BLOCK;i+=4) {
longDouble t00=aa[i+0+0*BLOCK];
longDouble t10=aa[i+0+1*BLOCK];
longDouble t20=aa[i+0+2*BLOCK];
longDouble t30=aa[i+0+3*BLOCK];
longDouble t01=aa[i+1+0*BLOCK];
longDouble t11=aa[i+1+1*BLOCK];
longDouble t21=aa[i+1+2*BLOCK];
longDouble t31=aa[i+1+3*BLOCK];
longDouble t02=aa[i+2+0*BLOCK];
longDouble t12=aa[i+2+1*BLOCK];
longDouble t22=aa[i+2+2*BLOCK];
longDouble t32=aa[i+2+3*BLOCK];
longDouble t03=aa[i+3+0*BLOCK];
longDouble t13=aa[i+3+1*BLOCK];
longDouble t23=aa[i+3+2*BLOCK];
longDouble t33=aa[i+3+3*BLOCK];
for (k=0;k<BLOCK;k++) {
longDouble multiplier = work[k];
longDouble a00=aUnder[i+0+k*BLOCK]*multiplier;
longDouble a01=aUnder[i+1+k*BLOCK]*multiplier;
longDouble a02=aUnder[i+2+k*BLOCK]*multiplier;
longDouble a03=aUnder[i+3+k*BLOCK]*multiplier;
t00 -= a00 * above[j + 0 + k * BLOCK];
t10 -= a00 * above[j + 1 + k * BLOCK];
t20 -= a00 * above[j + 2 + k * BLOCK];
t30 -= a00 * above[j + 3 + k * BLOCK];
t01 -= a01 * above[j + 0 + k * BLOCK];
t11 -= a01 * above[j + 1 + k * BLOCK];
t21 -= a01 * above[j + 2 + k * BLOCK];
t31 -= a01 * above[j + 3 + k * BLOCK];
t02 -= a02 * above[j + 0 + k * BLOCK];
t12 -= a02 * above[j + 1 + k * BLOCK];
t22 -= a02 * above[j + 2 + k * BLOCK];
t32 -= a02 * above[j + 3 + k * BLOCK];
t03 -= a03 * above[j + 0 + k * BLOCK];
t13 -= a03 * above[j + 1 + k * BLOCK];
t23 -= a03 * above[j + 2 + k * BLOCK];
t33 -= a03 * above[j + 3 + k * BLOCK];
}
aa[i+0+0*BLOCK]=t00;
aa[i+0+1*BLOCK]=t10;
aa[i+0+2*BLOCK]=t20;
aa[i+0+3*BLOCK]=t30;
aa[i+1+0*BLOCK]=t01;
aa[i+1+1*BLOCK]=t11;
aa[i+1+2*BLOCK]=t21;
aa[i+1+3*BLOCK]=t31;
aa[i+2+0*BLOCK]=t02;
aa[i+2+1*BLOCK]=t12;
aa[i+2+2*BLOCK]=t22;
aa[i+2+3*BLOCK]=t32;
aa[i+3+0*BLOCK]=t03;
aa[i+3+1*BLOCK]=t13;
aa[i+3+2*BLOCK]=t23;
aa[i+3+3*BLOCK]=t33;
}
}
#endif
} else {
int odd=nUnder&1;
int n=nUnder-odd;
for (j = 0; j < BLOCK; j +=4) {
aa+=4*BLOCK;
for (i=0;i< n;i+=2) {
longDouble t00=aa[i+0*BLOCK];
longDouble t10=aa[i+1*BLOCK];
longDouble t20=aa[i+2*BLOCK];
longDouble t30=aa[i+3*BLOCK];
longDouble t01=aa[i+1+0*BLOCK];
longDouble t11=aa[i+1+1*BLOCK];
longDouble t21=aa[i+1+2*BLOCK];
longDouble t31=aa[i+1+3*BLOCK];
for (k=0;k<BLOCK;k++) {
longDouble multiplier = work[k];
longDouble a00=aUnder[i+k*BLOCK]*multiplier;
longDouble a01=aUnder[i+1+k*BLOCK]*multiplier;
t00 -= a00 * above[j + 0 + k * BLOCK];
t10 -= a00 * above[j + 1 + k * BLOCK];
t20 -= a00 * above[j + 2 + k * BLOCK];
t30 -= a00 * above[j + 3 + k * BLOCK];
t01 -= a01 * above[j + 0 + k * BLOCK];
t11 -= a01 * above[j + 1 + k * BLOCK];
t21 -= a01 * above[j + 2 + k * BLOCK];
t31 -= a01 * above[j + 3 + k * BLOCK];
}
aa[i+0*BLOCK]=t00;
aa[i+1*BLOCK]=t10;
aa[i+2*BLOCK]=t20;
aa[i+3*BLOCK]=t30;
aa[i+1+0*BLOCK]=t01;
aa[i+1+1*BLOCK]=t11;
aa[i+1+2*BLOCK]=t21;
aa[i+1+3*BLOCK]=t31;
}
if (odd) {
longDouble t0=aa[n+0*BLOCK];
longDouble t1=aa[n+1*BLOCK];
longDouble t2=aa[n+2*BLOCK];
longDouble t3=aa[n+3*BLOCK];
longDouble a0;
for (k=0;k<BLOCK;k++) {
a0=aUnder[n+k*BLOCK]*work[k];
t0 -= a0 * above[j + 0 + k * BLOCK];
t1 -= a0 * above[j + 1 + k * BLOCK];
t2 -= a0 * above[j + 2 + k * BLOCK];
t3 -= a0 * above[j + 3 + k * BLOCK];
}
aa[n+0*BLOCK]=t0;
aa[n+1*BLOCK]=t1;
aa[n+2*BLOCK]=t2;
aa[n+3*BLOCK]=t3;
}
}
}
#else
aa = aOther-BLOCK;
for (j = 0; j < BLOCK; j ++) {
aa+=BLOCK;
for (i=0;i< nUnder;i++) {
longDouble t00=aa[i+0*BLOCK];
for (k=0;k<BLOCK;k++) {
longDouble a00=aUnder[i+k*BLOCK]*work[k];
t00 -= a00 * above[j + k * BLOCK];
}
aa[i]=t00;
}
}
#endif
}
/* Uses factorization to solve. */
void
ClpCholeskyDense::solve (double * region)
{
#ifdef CHOL_COMPARE
double * region2=NULL;
if (numberRows_<200) {
longDouble * xx = sparseFactor_;
longDouble * yy = diagonal_;
diagonal_ = sparseFactor_ + 40000;
sparseFactor_=diagonal_ + numberRows_;
region2 = ClpCopyOfArray(region,numberRows_);
int iRow,iColumn;
int addOffset=numberRows_-1;
longDouble * work = sparseFactor_-1;
for (iColumn=0;iColumn<numberRows_;iColumn++) {
double value = region2[iColumn];
for (iRow=iColumn+1;iRow<numberRows_;iRow++)
region2[iRow] -= value*work[iRow];
addOffset--;
work += addOffset;
}
for (iColumn=numberRows_-1;iColumn>=0;iColumn--) {
double value = region2[iColumn]*diagonal_[iColumn];
work -= addOffset;
addOffset++;
for (iRow=iColumn+1;iRow<numberRows_;iRow++)
value -= region2[iRow]*work[iRow];
region2[iColumn]=value;
}
sparseFactor_=xx;
diagonal_=yy;
}
#endif
//longDouble * xx = sparseFactor_;
//longDouble * yy = diagonal_;
//diagonal_ = sparseFactor_ + 40000;
//sparseFactor_=diagonal_ + numberRows_;
int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
// later align on boundary
longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
int iBlock;
longDouble * aa = a;
int iColumn;
for (iBlock=0;iBlock<numberBlocks;iBlock++) {
int nChunk;
int jBlock;
int iDo = iBlock*BLOCK;
int base=iDo;
if (iDo+BLOCK>numberRows_) {
nChunk=numberRows_-iDo;
} else {
nChunk=BLOCK;
}
solveF1(aa,nChunk,region+iDo);
for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
base+=BLOCK;
aa+=BLOCKSQ;
if (base+BLOCK>numberRows_) {
nChunk=numberRows_-base;
} else {
nChunk=BLOCK;
}
solveF2(aa,nChunk,region+iDo,region+base);
}
aa+=BLOCKSQ;
}
// do diagonal outside
for (iColumn=0;iColumn<numberRows_;iColumn++)
region[iColumn] *= diagonal_[iColumn];
int offset=((numberBlocks*(numberBlocks+1))>>1);
aa = a+number_entries(offset-1);
int lBase=(numberBlocks-1)*BLOCK;
for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) {
int nChunk;
int jBlock;
int triBase=iBlock*BLOCK;
int iBase=lBase;
for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
if (iBase+BLOCK>numberRows_) {
nChunk=numberRows_-iBase;
} else {
nChunk=BLOCK;
}
solveB2(aa,nChunk,region+triBase,region+iBase);
iBase-=BLOCK;
aa-=BLOCKSQ;
}
if (triBase+BLOCK>numberRows_) {
nChunk=numberRows_-triBase;
} else {
nChunk=BLOCK;
}
solveB1(aa,nChunk,region+triBase);
aa-=BLOCKSQ;
}
#ifdef CHOL_COMPARE
if (numberRows_<200) {
for (int i=0;i<numberRows_;i++) {
assert(fabs(region[i]-region2[i])<1.0e-3);
}
delete [] region2;
}
#endif
}
// Forward part of solve 1
void
ClpCholeskyDense::solveF1(longDouble * a,int n,double * region)
{
int j, k;
longDouble t00;
for (j = 0; j < n; j ++) {
t00 = region[j];
for (k = 0; k < j; ++k) {
t00 -= region[k] * a[j + k * BLOCK];
}
//t00*=a[j + j * BLOCK];
region[j] = t00;
}
}
// Forward part of solve 2
void
ClpCholeskyDense::solveF2(longDouble * a,int n,double * region, double * region2)
{
int j, k;
#if BLOCK==16
if (n==BLOCK) {
for (k = 0; k < BLOCK; k+=4) {
longDouble t0 = region2[0];
longDouble t1 = region2[1];
longDouble t2 = region2[2];
longDouble t3 = region2[3];
t0 -= region[0] * a[0 + 0 * BLOCK];
t1 -= region[0] * a[1 + 0 * BLOCK];
t2 -= region[0] * a[2 + 0 * BLOCK];
t3 -= region[0] * a[3 + 0 * BLOCK];
t0 -= region[1] * a[0 + 1 * BLOCK];
t1 -= region[1] * a[1 + 1 * BLOCK];
t2 -= region[1] * a[2 + 1 * BLOCK];
t3 -= region[1] * a[3 + 1 * BLOCK];
t0 -= region[2] * a[0 + 2 * BLOCK];
t1 -= region[2] * a[1 + 2 * BLOCK];
t2 -= region[2] * a[2 + 2 * BLOCK];
t3 -= region[2] * a[3 + 2 * BLOCK];
t0 -= region[3] * a[0 + 3 * BLOCK];
t1 -= region[3] * a[1 + 3 * BLOCK];
t2 -= region[3] * a[2 + 3 * BLOCK];
t3 -= region[3] * a[3 + 3 * BLOCK];
t0 -= region[4] * a[0 + 4 * BLOCK];
t1 -= region[4] * a[1 + 4 * BLOCK];
t2 -= region[4] * a[2 + 4 * BLOCK];
t3 -= region[4] * a[3 + 4 * BLOCK];
t0 -= region[5] * a[0 + 5 * BLOCK];
t1 -= region[5] * a[1 + 5 * BLOCK];
t2 -= region[5] * a[2 + 5 * BLOCK];
t3 -= region[5] * a[3 + 5 * BLOCK];
t0 -= region[6] * a[0 + 6 * BLOCK];
t1 -= region[6] * a[1 + 6 * BLOCK];
t2 -= region[6] * a[2 + 6 * BLOCK];
t3 -= region[6] * a[3 + 6 * BLOCK];
t0 -= region[7] * a[0 + 7 * BLOCK];
t1 -= region[7] * a[1 + 7 * BLOCK];
t2 -= region[7] * a[2 + 7 * BLOCK];
t3 -= region[7] * a[3 + 7 * BLOCK];
#if BLOCK>8
t0 -= region[8] * a[0 + 8 * BLOCK];
t1 -= region[8] * a[1 + 8 * BLOCK];
t2 -= region[8] * a[2 + 8 * BLOCK];
t3 -= region[8] * a[3 + 8 * BLOCK];
t0 -= region[9] * a[0 + 9 * BLOCK];
t1 -= region[9] * a[1 + 9 * BLOCK];
t2 -= region[9] * a[2 + 9 * BLOCK];
t3 -= region[9] * a[3 + 9 * BLOCK];
t0 -= region[10] * a[0 + 10 * BLOCK];
t1 -= region[10] * a[1 + 10 * BLOCK];
t2 -= region[10] * a[2 + 10 * BLOCK];
t3 -= region[10] * a[3 + 10 * BLOCK];
t0 -= region[11] * a[0 + 11 * BLOCK];
t1 -= region[11] * a[1 + 11 * BLOCK];
t2 -= region[11] * a[2 + 11 * BLOCK];
t3 -= region[11] * a[3 + 11 * BLOCK];
t0 -= region[12] * a[0 + 12 * BLOCK];
t1 -= region[12] * a[1 + 12 * BLOCK];
t2 -= region[12] * a[2 + 12 * BLOCK];
t3 -= region[12] * a[3 + 12 * BLOCK];
t0 -= region[13] * a[0 + 13 * BLOCK];
t1 -= region[13] * a[1 + 13 * BLOCK];
t2 -= region[13] * a[2 + 13 * BLOCK];
t3 -= region[13] * a[3 + 13 * BLOCK];
t0 -= region[14] * a[0 + 14 * BLOCK];
t1 -= region[14] * a[1 + 14 * BLOCK];
t2 -= region[14] * a[2 + 14 * BLOCK];
t3 -= region[14] * a[3 + 14 * BLOCK];
t0 -= region[15] * a[0 + 15 * BLOCK];
t1 -= region[15] * a[1 + 15 * BLOCK];
t2 -= region[15] * a[2 + 15 * BLOCK];
t3 -= region[15] * a[3 + 15 * BLOCK];
#endif
region2[0] = t0;
region2[1] = t1;
region2[2] = t2;
region2[3] = t3;
region2+=4;
a+=4;
}
} else {
#endif
for (k = 0; k < n; ++k) {
longDouble t00 = region2[k];
for (j = 0; j < BLOCK; j ++) {
t00 -= region[j] * a[k + j * BLOCK];
}
region2[k] = t00;
}
#if BLOCK==16
}
#endif
}
// Backward part of solve 1
void
ClpCholeskyDense::solveB1(longDouble * a,int n,double * region)
{
int j, k;
longDouble t00;
for (j = n-1; j >=0; j --) {
t00 = region[j];
for (k = j+1; k < n; ++k) {
t00 -= region[k] * a[k + j * BLOCK];
}
//t00*=a[j + j * BLOCK];
region[j] = t00;
}
}
// Backward part of solve 2
void
ClpCholeskyDense::solveB2(longDouble * a,int n,double * region, double * region2)
{
int j, k;
#if BLOCK==16
if (n==BLOCK) {
for (j = 0; j < BLOCK; j +=4) {
longDouble t0 = region[0];
longDouble t1 = region[1];
longDouble t2 = region[2];
longDouble t3 = region[3];
t0 -= region2[0] * a[0 + 0*BLOCK];
t1 -= region2[0] * a[0 + 1*BLOCK];
t2 -= region2[0] * a[0 + 2*BLOCK];
t3 -= region2[0] * a[0 + 3*BLOCK];
t0 -= region2[1] * a[1 + 0*BLOCK];
t1 -= region2[1] * a[1 + 1*BLOCK];
t2 -= region2[1] * a[1 + 2*BLOCK];
t3 -= region2[1] * a[1 + 3*BLOCK];
t0 -= region2[2] * a[2 + 0*BLOCK];
t1 -= region2[2] * a[2 + 1*BLOCK];
t2 -= region2[2] * a[2 + 2*BLOCK];
t3 -= region2[2] * a[2 + 3*BLOCK];
t0 -= region2[3] * a[3 + 0*BLOCK];
t1 -= region2[3] * a[3 + 1*BLOCK];
t2 -= region2[3] * a[3 + 2*BLOCK];
t3 -= region2[3] * a[3 + 3*BLOCK];
t0 -= region2[4] * a[4 + 0*BLOCK];
t1 -= region2[4] * a[4 + 1*BLOCK];
t2 -= region2[4] * a[4 + 2*BLOCK];
t3 -= region2[4] * a[4 + 3*BLOCK];
t0 -= region2[5] * a[5 + 0*BLOCK];
t1 -= region2[5] * a[5 + 1*BLOCK];
t2 -= region2[5] * a[5 + 2*BLOCK];
t3 -= region2[5] * a[5 + 3*BLOCK];
t0 -= region2[6] * a[6 + 0*BLOCK];
t1 -= region2[6] * a[6 + 1*BLOCK];
t2 -= region2[6] * a[6 + 2*BLOCK];
t3 -= region2[6] * a[6 + 3*BLOCK];
t0 -= region2[7] * a[7 + 0*BLOCK];
t1 -= region2[7] * a[7 + 1*BLOCK];
t2 -= region2[7] * a[7 + 2*BLOCK];
t3 -= region2[7] * a[7 + 3*BLOCK];
#if BLOCK>8
t0 -= region2[8] * a[8 + 0*BLOCK];
t1 -= region2[8] * a[8 + 1*BLOCK];
t2 -= region2[8] * a[8 + 2*BLOCK];
t3 -= region2[8] * a[8 + 3*BLOCK];
t0 -= region2[9] * a[9 + 0*BLOCK];
t1 -= region2[9] * a[9 + 1*BLOCK];
t2 -= region2[9] * a[9 + 2*BLOCK];
t3 -= region2[9] * a[9 + 3*BLOCK];
t0 -= region2[10] * a[10 + 0*BLOCK];
t1 -= region2[10] * a[10 + 1*BLOCK];
t2 -= region2[10] * a[10 + 2*BLOCK];
t3 -= region2[10] * a[10 + 3*BLOCK];
t0 -= region2[11] * a[11 + 0*BLOCK];
t1 -= region2[11] * a[11 + 1*BLOCK];
t2 -= region2[11] * a[11 + 2*BLOCK];
t3 -= region2[11] * a[11 + 3*BLOCK];
t0 -= region2[12] * a[12 + 0*BLOCK];
t1 -= region2[12] * a[12 + 1*BLOCK];
t2 -= region2[12] * a[12 + 2*BLOCK];
t3 -= region2[12] * a[12 + 3*BLOCK];
t0 -= region2[13] * a[13 + 0*BLOCK];
t1 -= region2[13] * a[13 + 1*BLOCK];
t2 -= region2[13] * a[13 + 2*BLOCK];
t3 -= region2[13] * a[13 + 3*BLOCK];
t0 -= region2[14] * a[14 + 0*BLOCK];
t1 -= region2[14] * a[14 + 1*BLOCK];
t2 -= region2[14] * a[14 + 2*BLOCK];
t3 -= region2[14] * a[14 + 3*BLOCK];
t0 -= region2[15] * a[15 + 0*BLOCK];
t1 -= region2[15] * a[15 + 1*BLOCK];
t2 -= region2[15] * a[15 + 2*BLOCK];
t3 -= region2[15] * a[15 + 3*BLOCK];
#endif
region[0] = t0;
region[1] = t1;
region[2] = t2;
region[3] = t3;
a+=4*BLOCK;
region+=4;
}
} else {
#endif
for (j = 0; j < BLOCK; j ++) {
longDouble t00 = region[j];
for (k = 0; k < n; ++k) {
t00 -= region2[k] * a[k + j * BLOCK];
}
region[j] = t00;
}
#if BLOCK==16
}
#endif
}
/* Uses factorization to solve. */
void
ClpCholeskyDense::solveLong (longDouble * region)
{
int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
// later align on boundary
longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
int iBlock;
longDouble * aa = a;
int iColumn;
for (iBlock=0;iBlock<numberBlocks;iBlock++) {
int nChunk;
int jBlock;
int iDo = iBlock*BLOCK;
int base=iDo;
if (iDo+BLOCK>numberRows_) {
nChunk=numberRows_-iDo;
} else {
nChunk=BLOCK;
}
solveF1Long(aa,nChunk,region+iDo);
for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
base+=BLOCK;
aa+=BLOCKSQ;
if (base+BLOCK>numberRows_) {
nChunk=numberRows_-base;
} else {
nChunk=BLOCK;
}
solveF2Long(aa,nChunk,region+iDo,region+base);
}
aa+=BLOCKSQ;
}
// do diagonal outside
for (iColumn=0;iColumn<numberRows_;iColumn++)
region[iColumn] *= diagonal_[iColumn];
int offset=((numberBlocks*(numberBlocks+1))>>1);
aa = a+number_entries(offset-1);
int lBase=(numberBlocks-1)*BLOCK;
for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) {
int nChunk;
int jBlock;
int triBase=iBlock*BLOCK;
int iBase=lBase;
for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
if (iBase+BLOCK>numberRows_) {
nChunk=numberRows_-iBase;
} else {
nChunk=BLOCK;
}
solveB2Long(aa,nChunk,region+triBase,region+iBase);
iBase-=BLOCK;
aa-=BLOCKSQ;
}
if (triBase+BLOCK>numberRows_) {
nChunk=numberRows_-triBase;
} else {
nChunk=BLOCK;
}
solveB1Long(aa,nChunk,region+triBase);
aa-=BLOCKSQ;
}
}
// Forward part of solve 1
void
ClpCholeskyDense::solveF1Long(longDouble * a,int n,longDouble * region)
{
int j, k;
longDouble t00;
for (j = 0; j < n; j ++) {
t00 = region[j];
for (k = 0; k < j; ++k) {
t00 -= region[k] * a[j + k * BLOCK];
}
//t00*=a[j + j * BLOCK];
region[j] = t00;
}
}
// Forward part of solve 2
void
ClpCholeskyDense::solveF2Long(longDouble * a,int n,longDouble * region, longDouble * region2)
{
int j, k;
#if BLOCK==16
if (n==BLOCK) {
for (k = 0; k < BLOCK; k+=4) {
longDouble t0 = region2[0];
longDouble t1 = region2[1];
longDouble t2 = region2[2];
longDouble t3 = region2[3];
t0 -= region[0] * a[0 + 0 * BLOCK];
t1 -= region[0] * a[1 + 0 * BLOCK];
t2 -= region[0] * a[2 + 0 * BLOCK];
t3 -= region[0] * a[3 + 0 * BLOCK];
t0 -= region[1] * a[0 + 1 * BLOCK];
t1 -= region[1] * a[1 + 1 * BLOCK];
t2 -= region[1] * a[2 + 1 * BLOCK];
t3 -= region[1] * a[3 + 1 * BLOCK];
t0 -= region[2] * a[0 + 2 * BLOCK];
t1 -= region[2] * a[1 + 2 * BLOCK];
t2 -= region[2] * a[2 + 2 * BLOCK];
t3 -= region[2] * a[3 + 2 * BLOCK];
t0 -= region[3] * a[0 + 3 * BLOCK];
t1 -= region[3] * a[1 + 3 * BLOCK];
t2 -= region[3] * a[2 + 3 * BLOCK];
t3 -= region[3] * a[3 + 3 * BLOCK];
t0 -= region[4] * a[0 + 4 * BLOCK];
t1 -= region[4] * a[1 + 4 * BLOCK];
t2 -= region[4] * a[2 + 4 * BLOCK];
t3 -= region[4] * a[3 + 4 * BLOCK];
t0 -= region[5] * a[0 + 5 * BLOCK];
t1 -= region[5] * a[1 + 5 * BLOCK];
t2 -= region[5] * a[2 + 5 * BLOCK];
t3 -= region[5] * a[3 + 5 * BLOCK];
t0 -= region[6] * a[0 + 6 * BLOCK];
t1 -= region[6] * a[1 + 6 * BLOCK];
t2 -= region[6] * a[2 + 6 * BLOCK];
t3 -= region[6] * a[3 + 6 * BLOCK];
t0 -= region[7] * a[0 + 7 * BLOCK];
t1 -= region[7] * a[1 + 7 * BLOCK];
t2 -= region[7] * a[2 + 7 * BLOCK];
t3 -= region[7] * a[3 + 7 * BLOCK];
#if BLOCK>8
t0 -= region[8] * a[0 + 8 * BLOCK];
t1 -= region[8] * a[1 + 8 * BLOCK];
t2 -= region[8] * a[2 + 8 * BLOCK];
t3 -= region[8] * a[3 + 8 * BLOCK];
t0 -= region[9] * a[0 + 9 * BLOCK];
t1 -= region[9] * a[1 + 9 * BLOCK];
t2 -= region[9] * a[2 + 9 * BLOCK];
t3 -= region[9] * a[3 + 9 * BLOCK];
t0 -= region[10] * a[0 + 10 * BLOCK];
t1 -= region[10] * a[1 + 10 * BLOCK];
t2 -= region[10] * a[2 + 10 * BLOCK];
t3 -= region[10] * a[3 + 10 * BLOCK];
t0 -= region[11] * a[0 + 11 * BLOCK];
t1 -= region[11] * a[1 + 11 * BLOCK];
t2 -= region[11] * a[2 + 11 * BLOCK];
t3 -= region[11] * a[3 + 11 * BLOCK];
t0 -= region[12] * a[0 + 12 * BLOCK];
t1 -= region[12] * a[1 + 12 * BLOCK];
t2 -= region[12] * a[2 + 12 * BLOCK];
t3 -= region[12] * a[3 + 12 * BLOCK];
t0 -= region[13] * a[0 + 13 * BLOCK];
t1 -= region[13] * a[1 + 13 * BLOCK];
t2 -= region[13] * a[2 + 13 * BLOCK];
t3 -= region[13] * a[3 + 13 * BLOCK];
t0 -= region[14] * a[0 + 14 * BLOCK];
t1 -= region[14] * a[1 + 14 * BLOCK];
t2 -= region[14] * a[2 + 14 * BLOCK];
t3 -= region[14] * a[3 + 14 * BLOCK];
t0 -= region[15] * a[0 + 15 * BLOCK];
t1 -= region[15] * a[1 + 15 * BLOCK];
t2 -= region[15] * a[2 + 15 * BLOCK];
t3 -= region[15] * a[3 + 15 * BLOCK];
#endif
region2[0] = t0;
region2[1] = t1;
region2[2] = t2;
region2[3] = t3;
region2+=4;
a+=4;
}
} else {
#endif
for (k = 0; k < n; ++k) {
longDouble t00 = region2[k];
for (j = 0; j < BLOCK; j ++) {
t00 -= region[j] * a[k + j * BLOCK];
}
region2[k] = t00;
}
#if BLOCK==16
}
#endif
}
// Backward part of solve 1
void
ClpCholeskyDense::solveB1Long(longDouble * a,int n,longDouble * region)
{
int j, k;
longDouble t00;
for (j = n-1; j >=0; j --) {
t00 = region[j];
for (k = j+1; k < n; ++k) {
t00 -= region[k] * a[k + j * BLOCK];
}
//t00*=a[j + j * BLOCK];
region[j] = t00;
}
}
// Backward part of solve 2
void
ClpCholeskyDense::solveB2Long(longDouble * a,int n,longDouble * region, longDouble * region2)
{
int j, k;
#if BLOCK==16
if (n==BLOCK) {
for (j = 0; j < BLOCK; j +=4) {
longDouble t0 = region[0];
longDouble t1 = region[1];
longDouble t2 = region[2];
longDouble t3 = region[3];
t0 -= region2[0] * a[0 + 0*BLOCK];
t1 -= region2[0] * a[0 + 1*BLOCK];
t2 -= region2[0] * a[0 + 2*BLOCK];
t3 -= region2[0] * a[0 + 3*BLOCK];
t0 -= region2[1] * a[1 + 0*BLOCK];
t1 -= region2[1] * a[1 + 1*BLOCK];
t2 -= region2[1] * a[1 + 2*BLOCK];
t3 -= region2[1] * a[1 + 3*BLOCK];
t0 -= region2[2] * a[2 + 0*BLOCK];
t1 -= region2[2] * a[2 + 1*BLOCK];
t2 -= region2[2] * a[2 + 2*BLOCK];
t3 -= region2[2] * a[2 + 3*BLOCK];
t0 -= region2[3] * a[3 + 0*BLOCK];
t1 -= region2[3] * a[3 + 1*BLOCK];
t2 -= region2[3] * a[3 + 2*BLOCK];
t3 -= region2[3] * a[3 + 3*BLOCK];
t0 -= region2[4] * a[4 + 0*BLOCK];
t1 -= region2[4] * a[4 + 1*BLOCK];
t2 -= region2[4] * a[4 + 2*BLOCK];
t3 -= region2[4] * a[4 + 3*BLOCK];
t0 -= region2[5] * a[5 + 0*BLOCK];
t1 -= region2[5] * a[5 + 1*BLOCK];
t2 -= region2[5] * a[5 + 2*BLOCK];
t3 -= region2[5] * a[5 + 3*BLOCK];
t0 -= region2[6] * a[6 + 0*BLOCK];
t1 -= region2[6] * a[6 + 1*BLOCK];
t2 -= region2[6] * a[6 + 2*BLOCK];
t3 -= region2[6] * a[6 + 3*BLOCK];
t0 -= region2[7] * a[7 + 0*BLOCK];
t1 -= region2[7] * a[7 + 1*BLOCK];
t2 -= region2[7] * a[7 + 2*BLOCK];
t3 -= region2[7] * a[7 + 3*BLOCK];
#if BLOCK>8
t0 -= region2[8] * a[8 + 0*BLOCK];
t1 -= region2[8] * a[8 + 1*BLOCK];
t2 -= region2[8] * a[8 + 2*BLOCK];
t3 -= region2[8] * a[8 + 3*BLOCK];
t0 -= region2[9] * a[9 + 0*BLOCK];
t1 -= region2[9] * a[9 + 1*BLOCK];
t2 -= region2[9] * a[9 + 2*BLOCK];
t3 -= region2[9] * a[9 + 3*BLOCK];
t0 -= region2[10] * a[10 + 0*BLOCK];
t1 -= region2[10] * a[10 + 1*BLOCK];
t2 -= region2[10] * a[10 + 2*BLOCK];
t3 -= region2[10] * a[10 + 3*BLOCK];
t0 -= region2[11] * a[11 + 0*BLOCK];
t1 -= region2[11] * a[11 + 1*BLOCK];
t2 -= region2[11] * a[11 + 2*BLOCK];
t3 -= region2[11] * a[11 + 3*BLOCK];
t0 -= region2[12] * a[12 + 0*BLOCK];
t1 -= region2[12] * a[12 + 1*BLOCK];
t2 -= region2[12] * a[12 + 2*BLOCK];
t3 -= region2[12] * a[12 + 3*BLOCK];
t0 -= region2[13] * a[13 + 0*BLOCK];
t1 -= region2[13] * a[13 + 1*BLOCK];
t2 -= region2[13] * a[13 + 2*BLOCK];
t3 -= region2[13] * a[13 + 3*BLOCK];
t0 -= region2[14] * a[14 + 0*BLOCK];
t1 -= region2[14] * a[14 + 1*BLOCK];
t2 -= region2[14] * a[14 + 2*BLOCK];
t3 -= region2[14] * a[14 + 3*BLOCK];
t0 -= region2[15] * a[15 + 0*BLOCK];
t1 -= region2[15] * a[15 + 1*BLOCK];
t2 -= region2[15] * a[15 + 2*BLOCK];
t3 -= region2[15] * a[15 + 3*BLOCK];
#endif
region[0] = t0;
region[1] = t1;
region[2] = t2;
region[3] = t3;
a+=4*BLOCK;
region+=4;
}
} else {
#endif
for (j = 0; j < BLOCK; j ++) {
longDouble t00 = region[j];
for (k = 0; k < n; ++k) {
t00 -= region2[k] * a[k + j * BLOCK];
}
region[j] = t00;
}
#if BLOCK==16
}
#endif
}
/* Uses factorization to solve. */
void
ClpCholeskyDense::solveLongWork (longWork * region)
{
int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
// later align on boundary
longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
int iBlock;
longDouble * aa = a;
int iColumn;
for (iBlock=0;iBlock<numberBlocks;iBlock++) {
int nChunk;
int jBlock;
int iDo = iBlock*BLOCK;
int base=iDo;
if (iDo+BLOCK>numberRows_) {
nChunk=numberRows_-iDo;
} else {
nChunk=BLOCK;
}
solveF1LongWork(aa,nChunk,region+iDo);
for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
base+=BLOCK;
aa+=BLOCKSQ;
if (base+BLOCK>numberRows_) {
nChunk=numberRows_-base;
} else {
nChunk=BLOCK;
}
solveF2LongWork(aa,nChunk,region+iDo,region+base);
}
aa+=BLOCKSQ;
}
// do diagonal outside
for (iColumn=0;iColumn<numberRows_;iColumn++)
region[iColumn] *= diagonal_[iColumn];
int offset=((numberBlocks*(numberBlocks+1))>>1);
aa = a+number_entries(offset-1);
int lBase=(numberBlocks-1)*BLOCK;
for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) {
int nChunk;
int jBlock;
int triBase=iBlock*BLOCK;
int iBase=lBase;
for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
if (iBase+BLOCK>numberRows_) {
nChunk=numberRows_-iBase;
} else {
nChunk=BLOCK;
}
solveB2LongWork(aa,nChunk,region+triBase,region+iBase);
iBase-=BLOCK;
aa-=BLOCKSQ;
}
if (triBase+BLOCK>numberRows_) {
nChunk=numberRows_-triBase;
} else {
nChunk=BLOCK;
}
solveB1LongWork(aa,nChunk,region+triBase);
aa-=BLOCKSQ;
}
}
// Forward part of solve 1
void
ClpCholeskyDense::solveF1LongWork(longDouble * a,int n,longWork * region)
{
int j, k;
longWork t00;
for (j = 0; j < n; j ++) {
t00 = region[j];
for (k = 0; k < j; ++k) {
t00 -= region[k] * a[j + k * BLOCK];
}
//t00*=a[j + j * BLOCK];
region[j] = t00;
}
}
// Forward part of solve 2
void
ClpCholeskyDense::solveF2LongWork(longDouble * a,int n,longWork * region, longWork * region2)
{
int j, k;
#if BLOCK==16
if (n==BLOCK) {
for (k = 0; k < BLOCK; k+=4) {
longWork t0 = region2[0];
longWork t1 = region2[1];
longWork t2 = region2[2];
longWork t3 = region2[3];
t0 -= region[0] * a[0 + 0 * BLOCK];
t1 -= region[0] * a[1 + 0 * BLOCK];
t2 -= region[0] * a[2 + 0 * BLOCK];
t3 -= region[0] * a[3 + 0 * BLOCK];
t0 -= region[1] * a[0 + 1 * BLOCK];
t1 -= region[1] * a[1 + 1 * BLOCK];
t2 -= region[1] * a[2 + 1 * BLOCK];
t3 -= region[1] * a[3 + 1 * BLOCK];
t0 -= region[2] * a[0 + 2 * BLOCK];
t1 -= region[2] * a[1 + 2 * BLOCK];
t2 -= region[2] * a[2 + 2 * BLOCK];
t3 -= region[2] * a[3 + 2 * BLOCK];
t0 -= region[3] * a[0 + 3 * BLOCK];
t1 -= region[3] * a[1 + 3 * BLOCK];
t2 -= region[3] * a[2 + 3 * BLOCK];
t3 -= region[3] * a[3 + 3 * BLOCK];
t0 -= region[4] * a[0 + 4 * BLOCK];
t1 -= region[4] * a[1 + 4 * BLOCK];
t2 -= region[4] * a[2 + 4 * BLOCK];
t3 -= region[4] * a[3 + 4 * BLOCK];
t0 -= region[5] * a[0 + 5 * BLOCK];
t1 -= region[5] * a[1 + 5 * BLOCK];
t2 -= region[5] * a[2 + 5 * BLOCK];
t3 -= region[5] * a[3 + 5 * BLOCK];
t0 -= region[6] * a[0 + 6 * BLOCK];
t1 -= region[6] * a[1 + 6 * BLOCK];
t2 -= region[6] * a[2 + 6 * BLOCK];
t3 -= region[6] * a[3 + 6 * BLOCK];
t0 -= region[7] * a[0 + 7 * BLOCK];
t1 -= region[7] * a[1 + 7 * BLOCK];
t2 -= region[7] * a[2 + 7 * BLOCK];
t3 -= region[7] * a[3 + 7 * BLOCK];
#if BLOCK>8
t0 -= region[8] * a[0 + 8 * BLOCK];
t1 -= region[8] * a[1 + 8 * BLOCK];
t2 -= region[8] * a[2 + 8 * BLOCK];
t3 -= region[8] * a[3 + 8 * BLOCK];
t0 -= region[9] * a[0 + 9 * BLOCK];
t1 -= region[9] * a[1 + 9 * BLOCK];
t2 -= region[9] * a[2 + 9 * BLOCK];
t3 -= region[9] * a[3 + 9 * BLOCK];
t0 -= region[10] * a[0 + 10 * BLOCK];
t1 -= region[10] * a[1 + 10 * BLOCK];
t2 -= region[10] * a[2 + 10 * BLOCK];
t3 -= region[10] * a[3 + 10 * BLOCK];
t0 -= region[11] * a[0 + 11 * BLOCK];
t1 -= region[11] * a[1 + 11 * BLOCK];
t2 -= region[11] * a[2 + 11 * BLOCK];
t3 -= region[11] * a[3 + 11 * BLOCK];
t0 -= region[12] * a[0 + 12 * BLOCK];
t1 -= region[12] * a[1 + 12 * BLOCK];
t2 -= region[12] * a[2 + 12 * BLOCK];
t3 -= region[12] * a[3 + 12 * BLOCK];
t0 -= region[13] * a[0 + 13 * BLOCK];
t1 -= region[13] * a[1 + 13 * BLOCK];
t2 -= region[13] * a[2 + 13 * BLOCK];
t3 -= region[13] * a[3 + 13 * BLOCK];
t0 -= region[14] * a[0 + 14 * BLOCK];
t1 -= region[14] * a[1 + 14 * BLOCK];
t2 -= region[14] * a[2 + 14 * BLOCK];
t3 -= region[14] * a[3 + 14 * BLOCK];
t0 -= region[15] * a[0 + 15 * BLOCK];
t1 -= region[15] * a[1 + 15 * BLOCK];
t2 -= region[15] * a[2 + 15 * BLOCK];
t3 -= region[15] * a[3 + 15 * BLOCK];
#endif
region2[0] = t0;
region2[1] = t1;
region2[2] = t2;
region2[3] = t3;
region2+=4;
a+=4;
}
} else {
#endif
for (k = 0; k < n; ++k) {
longWork t00 = region2[k];
for (j = 0; j < BLOCK; j ++) {
t00 -= region[j] * a[k + j * BLOCK];
}
region2[k] = t00;
}
#if BLOCK==16
}
#endif
}
// Backward part of solve 1
void
ClpCholeskyDense::solveB1LongWork(longDouble * a,int n,longWork * region)
{
int j, k;
longWork t00;
for (j = n-1; j >=0; j --) {
t00 = region[j];
for (k = j+1; k < n; ++k) {
t00 -= region[k] * a[k + j * BLOCK];
}
//t00*=a[j + j * BLOCK];
region[j] = t00;
}
}
// Backward part of solve 2
void
ClpCholeskyDense::solveB2LongWork(longDouble * a,int n,longWork * region, longWork * region2)
{
int j, k;
#if BLOCK==16
if (n==BLOCK) {
for (j = 0; j < BLOCK; j +=4) {
longWork t0 = region[0];
longWork t1 = region[1];
longWork t2 = region[2];
longWork t3 = region[3];
t0 -= region2[0] * a[0 + 0*BLOCK];
t1 -= region2[0] * a[0 + 1*BLOCK];
t2 -= region2[0] * a[0 + 2*BLOCK];
t3 -= region2[0] * a[0 + 3*BLOCK];
t0 -= region2[1] * a[1 + 0*BLOCK];
t1 -= region2[1] * a[1 + 1*BLOCK];
t2 -= region2[1] * a[1 + 2*BLOCK];
t3 -= region2[1] * a[1 + 3*BLOCK];
t0 -= region2[2] * a[2 + 0*BLOCK];
t1 -= region2[2] * a[2 + 1*BLOCK];
t2 -= region2[2] * a[2 + 2*BLOCK];
t3 -= region2[2] * a[2 + 3*BLOCK];
t0 -= region2[3] * a[3 + 0*BLOCK];
t1 -= region2[3] * a[3 + 1*BLOCK];
t2 -= region2[3] * a[3 + 2*BLOCK];
t3 -= region2[3] * a[3 + 3*BLOCK];
t0 -= region2[4] * a[4 + 0*BLOCK];
t1 -= region2[4] * a[4 + 1*BLOCK];
t2 -= region2[4] * a[4 + 2*BLOCK];
t3 -= region2[4] * a[4 + 3*BLOCK];
t0 -= region2[5] * a[5 + 0*BLOCK];
t1 -= region2[5] * a[5 + 1*BLOCK];
t2 -= region2[5] * a[5 + 2*BLOCK];
t3 -= region2[5] * a[5 + 3*BLOCK];
t0 -= region2[6] * a[6 + 0*BLOCK];
t1 -= region2[6] * a[6 + 1*BLOCK];
t2 -= region2[6] * a[6 + 2*BLOCK];
t3 -= region2[6] * a[6 + 3*BLOCK];
t0 -= region2[7] * a[7 + 0*BLOCK];
t1 -= region2[7] * a[7 + 1*BLOCK];
t2 -= region2[7] * a[7 + 2*BLOCK];
t3 -= region2[7] * a[7 + 3*BLOCK];
#if BLOCK>8
t0 -= region2[8] * a[8 + 0*BLOCK];
t1 -= region2[8] * a[8 + 1*BLOCK];
t2 -= region2[8] * a[8 + 2*BLOCK];
t3 -= region2[8] * a[8 + 3*BLOCK];
t0 -= region2[9] * a[9 + 0*BLOCK];
t1 -= region2[9] * a[9 + 1*BLOCK];
t2 -= region2[9] * a[9 + 2*BLOCK];
t3 -= region2[9] * a[9 + 3*BLOCK];
t0 -= region2[10] * a[10 + 0*BLOCK];
t1 -= region2[10] * a[10 + 1*BLOCK];
t2 -= region2[10] * a[10 + 2*BLOCK];
t3 -= region2[10] * a[10 + 3*BLOCK];
t0 -= region2[11] * a[11 + 0*BLOCK];
t1 -= region2[11] * a[11 + 1*BLOCK];
t2 -= region2[11] * a[11 + 2*BLOCK];
t3 -= region2[11] * a[11 + 3*BLOCK];
t0 -= region2[12] * a[12 + 0*BLOCK];
t1 -= region2[12] * a[12 + 1*BLOCK];
t2 -= region2[12] * a[12 + 2*BLOCK];
t3 -= region2[12] * a[12 + 3*BLOCK];
t0 -= region2[13] * a[13 + 0*BLOCK];
t1 -= region2[13] * a[13 + 1*BLOCK];
t2 -= region2[13] * a[13 + 2*BLOCK];
t3 -= region2[13] * a[13 + 3*BLOCK];
t0 -= region2[14] * a[14 + 0*BLOCK];
t1 -= region2[14] * a[14 + 1*BLOCK];
t2 -= region2[14] * a[14 + 2*BLOCK];
t3 -= region2[14] * a[14 + 3*BLOCK];
t0 -= region2[15] * a[15 + 0*BLOCK];
t1 -= region2[15] * a[15 + 1*BLOCK];
t2 -= region2[15] * a[15 + 2*BLOCK];
t3 -= region2[15] * a[15 + 3*BLOCK];
#endif
region[0] = t0;
region[1] = t1;
region[2] = t2;
region[3] = t3;
a+=4*BLOCK;
region+=4;
}
} else {
#endif
for (j = 0; j < BLOCK; j ++) {
longWork t00 = region[j];
for (k = 0; k < n; ++k) {
t00 -= region2[k] * a[k + j * BLOCK];
}
region[j] = t00;
}
#if BLOCK==16
}
#endif
}
syntax highlighted by Code2HTML, v. 0.9.1