// Copyright (C) 2003, International Business Machines
// Corporation and others.  All Rights Reserved.



#include "CoinPragma.hpp"
#include "CoinHelperFunctions.hpp"

#include "ClpInterior.hpp"
#include "ClpCholeskyDense.hpp"
#include "ClpMessage.hpp"
#include "ClpQuadraticObjective.hpp"

//#############################################################################
// Constructors / Destructor / Assignment
//#############################################################################

//-------------------------------------------------------------------
// Default Constructor 
//-------------------------------------------------------------------
ClpCholeskyDense::ClpCholeskyDense () 
  : ClpCholeskyBase(),
    borrowSpace_(false)
{
  type_=11;;
}

//-------------------------------------------------------------------
// Copy constructor 
//-------------------------------------------------------------------
ClpCholeskyDense::ClpCholeskyDense (const ClpCholeskyDense & rhs) 
  : ClpCholeskyBase(rhs),
    borrowSpace_(rhs.borrowSpace_)
{
  assert(!rhs.borrowSpace_||!rhs.sizeFactor_); // can't do if borrowing space
}


//-------------------------------------------------------------------
// Destructor 
//-------------------------------------------------------------------
ClpCholeskyDense::~ClpCholeskyDense ()
{
  if (borrowSpace_) {
    // set NULL
    sparseFactor_=NULL;
    workDouble_=NULL;
    diagonal_=NULL;
  }
}

//----------------------------------------------------------------
// Assignment operator 
//-------------------------------------------------------------------
ClpCholeskyDense &
ClpCholeskyDense::operator=(const ClpCholeskyDense& rhs)
{
  if (this != &rhs) {
    assert(!rhs.borrowSpace_||!rhs.sizeFactor_); // can't do if borrowing space
    ClpCholeskyBase::operator=(rhs);
    borrowSpace_=rhs.borrowSpace_;
  }
  return *this;
}
//-------------------------------------------------------------------
// Clone
//-------------------------------------------------------------------
ClpCholeskyBase * ClpCholeskyDense::clone() const
{
  return new ClpCholeskyDense(*this);
}
// If not power of 2 then need to redo a bit
#define BLOCK 16
#define BLOCKSHIFT 4

#define BLOCKSQ ( BLOCK*BLOCK )
#define BLOCKSQSHIFT ( BLOCKSHIFT+BLOCKSHIFT )
#define number_blocks(x) (((x)+BLOCK-1)>>BLOCKSHIFT)
#define number_rows(x) ((x)<<BLOCKSHIFT)
#define number_entries(x) ((x)<<BLOCKSQSHIFT)
/* Gets space */
int 
ClpCholeskyDense::reserveSpace(const ClpCholeskyBase * factor, int numberRows) 
{
  numberRows_ = numberRows;
  int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
  // allow one stripe extra
  numberBlocks = numberBlocks + ((numberBlocks*(numberBlocks+1))/2);
  sizeFactor_=numberBlocks*BLOCKSQ;
  //#define CHOL_COMPARE
#ifdef CHOL_COMPARE  
  sizeFactor_ += 95000;
#endif
  if (!factor) {
    sparseFactor_ = new longDouble [sizeFactor_];
    rowsDropped_ = new char [numberRows_];
    memset(rowsDropped_,0,numberRows_);
    workDouble_ = new longDouble[numberRows_];
    diagonal_ = new longDouble[numberRows_];
  } else {
    borrowSpace_=true;
    int numberFull = factor->numberRows();
    sparseFactor_ = factor->sparseFactor()+(factor->size()-sizeFactor_);
    workDouble_ = factor->workDouble() + (numberFull-numberRows_);
    diagonal_ = factor->diagonal() + (numberFull-numberRows_);
  }
  numberRowsDropped_=0;
  return 0;
}
/* Returns space needed */
CoinBigIndex 
ClpCholeskyDense::space( int numberRows) const
{
 int numberBlocks = (numberRows+BLOCK-1)>>BLOCKSHIFT;
  // allow one stripe extra
  numberBlocks = numberBlocks + ((numberBlocks*(numberBlocks+1))/2);
  CoinBigIndex sizeFactor=numberBlocks*BLOCKSQ;
#ifdef CHOL_COMPARE  
  sizeFactor += 95000;
#endif
  return sizeFactor;
 }
/* Orders rows and saves pointer to matrix.and model */
int 
ClpCholeskyDense::order(ClpInterior * model) 
{
  model_=model;
  int numberRows;
  int numberRowsModel = model_->numberRows();
  int numberColumns = model_->numberColumns();
  if (!doKKT_) {
    numberRows = numberRowsModel;
  } else {
    numberRows = 2*numberRowsModel+numberColumns;
  }
  reserveSpace(NULL,numberRows);
  rowCopy_ = model->clpMatrix()->reverseOrderedCopy();
  return 0;
}
/* Does Symbolic factorization given permutation.
   This is called immediately after order.  If user provides this then
   user must provide factorize and solve.  Otherwise the default factorization is used
   returns non-zero if not enough memory */
int 
ClpCholeskyDense::symbolic()
{
  return 0;
}
/* Factorize - filling in rowsDropped and returning number dropped */
int 
ClpCholeskyDense::factorize(const double * diagonal, int * rowsDropped) 
{
  assert (!borrowSpace_);
  const CoinBigIndex * columnStart = model_->clpMatrix()->getVectorStarts();
  const int * columnLength = model_->clpMatrix()->getVectorLengths();
  const int * row = model_->clpMatrix()->getIndices();
  const double * element = model_->clpMatrix()->getElements();
  const CoinBigIndex * rowStart = rowCopy_->getVectorStarts();
  const int * rowLength = rowCopy_->getVectorLengths();
  const int * column = rowCopy_->getIndices();
  const double * elementByRow = rowCopy_->getElements();
  int numberColumns=model_->clpMatrix()->getNumCols();
  CoinZeroN(sparseFactor_,sizeFactor_);
  //perturbation
  longDouble perturbation=model_->diagonalPerturbation()*model_->diagonalNorm();
  perturbation=perturbation*perturbation;
  if (perturbation>1.0) {
#ifdef COIN_DEVELOP
    //if (model_->model()->logLevel()&4) 
      std::cout <<"large perturbation "<<perturbation<<std::endl;
#endif
    perturbation=sqrt(perturbation);;
    perturbation=1.0;
  }
  int iRow;
  int newDropped=0;
  double largest=1.0;
  double smallest=COIN_DBL_MAX;
  longDouble delta2 = model_->delta(); // add delta*delta to diagonal
  delta2 *= delta2;
  if (!doKKT_) {
    longDouble * work = sparseFactor_;
    work--; // skip diagonal
    int addOffset=numberRows_-1;
    const double * diagonalSlack = diagonal + numberColumns;
    // largest in initial matrix
    double largest2=1.0e-20;
    for (iRow=0;iRow<numberRows_;iRow++) {
      if (!rowsDropped_[iRow]) {
	CoinBigIndex startRow=rowStart[iRow];
	CoinBigIndex endRow=rowStart[iRow]+rowLength[iRow];
	longDouble diagonalValue = diagonalSlack[iRow]+delta2;
	for (CoinBigIndex k=startRow;k<endRow;k++) {
	  int iColumn=column[k];
	  CoinBigIndex start=columnStart[iColumn];
	  CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn];
	  longDouble multiplier = diagonal[iColumn]*elementByRow[k];
	  for (CoinBigIndex j=start;j<end;j++) {
	    int jRow=row[j];
	    if (!rowsDropped_[jRow]) {
	      if (jRow>iRow) {
		longDouble value=element[j]*multiplier;
		work[jRow] += value;
	      } else if (jRow==iRow) {
		longDouble value=element[j]*multiplier;
		diagonalValue += value;
	      }
	    }
	  }
	} 
	for (int j=iRow+1;j<numberRows_;j++) 
	  largest2 = CoinMax(largest2,fabs(work[j]));
	diagonal_[iRow]=diagonalValue;
	largest2 = CoinMax(largest2,fabs(diagonalValue));
      } else {
	// dropped
	diagonal_[iRow]=1.0;
      }
      addOffset--;
      work += addOffset;
    }
    //check sizes
    largest2*=1.0e-20;
    largest = CoinMin(largest2,1.0e-11);
    int numberDroppedBefore=0;
    for (iRow=0;iRow<numberRows_;iRow++) {
      int dropped=rowsDropped_[iRow];
      // Move to int array
      rowsDropped[iRow]=dropped;
      if (!dropped) {
	longDouble diagonal = diagonal_[iRow];
	if (diagonal>largest2) {
	  diagonal_[iRow]=diagonal+perturbation;
	} else {
	  diagonal_[iRow]=diagonal+perturbation;
	  rowsDropped[iRow]=2;
	  numberDroppedBefore++;
	} 
      }
    }
    doubleParameters_[10]=CoinMax(1.0e-20,largest);
    integerParameters_[20]=0;
    doubleParameters_[3]=0.0;
    doubleParameters_[4]=COIN_DBL_MAX;
    integerParameters_[34]=0; // say all must be positive
#ifdef CHOL_COMPARE  
    if (numberRows_<200)
      factorizePart3(rowsDropped);
#endif
    factorizePart2(rowsDropped);
    newDropped=integerParameters_[20]+numberDroppedBefore;
    largest=doubleParameters_[3];
    smallest=doubleParameters_[4];
    if (model_->messageHandler()->logLevel()>1) 
      std::cout<<"Cholesky - largest "<<largest<<" smallest "<<smallest<<std::endl;
    choleskyCondition_=largest/smallest;
    //drop fresh makes some formADAT easier
    if (newDropped||numberRowsDropped_) {
      newDropped=0;
      for (int i=0;i<numberRows_;i++) {
	char dropped = rowsDropped[i];
	rowsDropped_[i]=dropped;
	if (dropped==2) {
	  //dropped this time
	  rowsDropped[newDropped++]=i;
	  rowsDropped_[i]=0;
	} 
      } 
      numberRowsDropped_=newDropped;
      newDropped=-(2+newDropped);
    } 
  } else {
    // KKT
    CoinPackedMatrix * quadratic = NULL;
    ClpQuadraticObjective * quadraticObj = 
      (dynamic_cast< ClpQuadraticObjective*>(model_->objectiveAsObject()));
    if (quadraticObj) 
      quadratic = quadraticObj->quadraticObjective();
    // matrix
    int numberRowsModel = model_->numberRows();
    int numberColumns = model_->numberColumns();
    int numberTotal = numberColumns + numberRowsModel;
    longDouble * work = sparseFactor_;
    work--; // skip diagonal
    int addOffset=numberRows_-1;
    int iColumn;
    if (!quadratic) {
      for (iColumn=0;iColumn<numberColumns;iColumn++) {
	longDouble value = diagonal[iColumn];
	if (fabs(value)>1.0e-100) {
	  value = 1.0/value;
	  largest = CoinMax(largest,fabs(value));
	  diagonal_[iColumn] = -value;
	  CoinBigIndex start=columnStart[iColumn];
	  CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn];
	  for (CoinBigIndex j=start;j<end;j++) {
	    //choleskyRow_[numberElements]=row[j]+numberTotal;
	    //sparseFactor_[numberElements++]=element[j];
	    work[row[j]+numberTotal]=element[j];
	    largest = CoinMax(largest,fabs(element[j]));
	  }
	} else {
	  diagonal_[iColumn] = -value;
	}
	addOffset--;
	work += addOffset;
      }
    } else {
      // Quadratic
      const int * columnQuadratic = quadratic->getIndices();
      const CoinBigIndex * columnQuadraticStart = quadratic->getVectorStarts();
      const int * columnQuadraticLength = quadratic->getVectorLengths();
      const double * quadraticElement = quadratic->getElements();
      for (iColumn=0;iColumn<numberColumns;iColumn++) {
	longDouble value = diagonal[iColumn];
	CoinBigIndex j;
	if (fabs(value)>1.0e-100) {
	  value = 1.0/value;
	  for (j=columnQuadraticStart[iColumn];
	       j<columnQuadraticStart[iColumn]+columnQuadraticLength[iColumn];j++) {
	    int jColumn = columnQuadratic[j];
	    if (jColumn>iColumn) {
	      work[jColumn]=-quadraticElement[j];
	    } else if (iColumn==jColumn) {
	      value += quadraticElement[j];
	    }
	  }
	  largest = CoinMax(largest,fabs(value));
	  diagonal_[iColumn] = -value;
	  CoinBigIndex start=columnStart[iColumn];
	  CoinBigIndex end=columnStart[iColumn]+columnLength[iColumn];
	  for (j=start;j<end;j++) {
	    work[row[j]+numberTotal]=element[j];
	    largest = CoinMax(largest,fabs(element[j]));
	  }
	} else {
	  value = 1.0e100;
	  diagonal_[iColumn] = -value;
	}
	addOffset--;
	work += addOffset;
      }
    }
    // slacks
    for (iColumn=numberColumns;iColumn<numberTotal;iColumn++) {
      longDouble value = diagonal[iColumn];
      if (fabs(value)>1.0e-100) {
	value = 1.0/value;
	largest = CoinMax(largest,fabs(value));
      } else {
	value = 1.0e100;
      }
      diagonal_[iColumn] = -value;
      work[iColumn-numberColumns+numberTotal]=-1.0;
      addOffset--;
      work += addOffset;
    }
    // Finish diagonal
    for (iRow=0;iRow<numberRowsModel;iRow++) {
      diagonal_[iRow+numberTotal]=delta2;
    }
    //check sizes
    largest*=1.0e-20;
    largest = CoinMin(largest,1.0e-11);
    doubleParameters_[10]=CoinMax(1.0e-20,largest);
    integerParameters_[20]=0;
    doubleParameters_[3]=0.0;
    doubleParameters_[4]=COIN_DBL_MAX;
    // Set up LDL cutoff
    integerParameters_[34]=numberTotal;
    // KKT
    int * rowsDropped2 = new int[numberRows_];
    CoinZeroN(rowsDropped2,numberRows_);
#ifdef CHOL_COMPARE  
    if (numberRows_<200)
      factorizePart3(rowsDropped2);
#endif
    factorizePart2(rowsDropped2);
    newDropped=integerParameters_[20];
    largest=doubleParameters_[3];
    smallest=doubleParameters_[4];
    if (model_->messageHandler()->logLevel()>1) 
      std::cout<<"Cholesky - largest "<<largest<<" smallest "<<smallest<<std::endl;
    choleskyCondition_=largest/smallest;
    // Should save adjustments in ..R_
    int n1=0,n2=0;
    double * primalR = model_->primalR();
    double * dualR = model_->dualR();
    for (iRow=0;iRow<numberTotal;iRow++) { 
      if (rowsDropped2[iRow]) {
	n1++;
	//printf("row region1 %d dropped\n",iRow);
	//rowsDropped_[iRow]=1;
	rowsDropped_[iRow]=0;
	primalR[iRow]=doubleParameters_[20];
      } else {
	rowsDropped_[iRow]=0;
	primalR[iRow]=0.0;
      }
    }
    for (;iRow<numberRows_;iRow++) {
      if (rowsDropped2[iRow]) {
	n2++;
	//printf("row region2 %d dropped\n",iRow);
	//rowsDropped_[iRow]=1;
	rowsDropped_[iRow]=0;
	dualR[iRow-numberTotal]=doubleParameters_[34];
      } else {
	rowsDropped_[iRow]=0;
	dualR[iRow-numberTotal]=0.0;
      }
    }
  }
  return 0;
}
/* Factorize - filling in rowsDropped and returning number dropped */
void 
ClpCholeskyDense::factorizePart3( int * rowsDropped) 
{
  int iColumn;
  longDouble * xx = sparseFactor_;
  longDouble * yy = diagonal_;
  diagonal_ = sparseFactor_ + 40000;
  sparseFactor_=diagonal_ + numberRows_;
  //memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));
  memcpy(sparseFactor_,xx,40000*sizeof(double));
  memcpy(diagonal_,yy,numberRows_*sizeof(double));
  int numberDropped=0;
  double largest=0.0;
  double smallest=COIN_DBL_MAX;
  double dropValue = doubleParameters_[10];
  int firstPositive=integerParameters_[34];
  longDouble * work = sparseFactor_;
  // Allow for triangular
  int addOffset=numberRows_-1;
  work--;
  for (iColumn=0;iColumn<numberRows_;iColumn++) {
    int iRow;
    int addOffsetNow = numberRows_-1;;
    longDouble * workNow=sparseFactor_-1+iColumn;
    double diagonalValue = diagonal_[iColumn];
    for (iRow=0;iRow<iColumn;iRow++) {
      double aj = *workNow;
      addOffsetNow--;
      workNow += addOffsetNow;
      double multiplier = workDouble_[iRow];
      diagonalValue -= aj*aj*multiplier;
    }
    bool dropColumn=false;
    if (iColumn<firstPositive) {
      // must be negative
      if (diagonalValue<=-dropValue) {
	smallest = CoinMin(smallest,-diagonalValue);
	largest = CoinMax(largest,-diagonalValue);
	workDouble_[iColumn]=diagonalValue;
	diagonalValue = 1.0/diagonalValue;
      } else {
	dropColumn=true;
	workDouble_[iColumn]=-1.0e100;
	diagonalValue=0.0;
	integerParameters_[20]++;
      }
    } else {
      // must be positive
      if (diagonalValue>=dropValue) {
	smallest = CoinMin(smallest,diagonalValue);
	largest = CoinMax(largest,diagonalValue);
	workDouble_[iColumn]=diagonalValue;
	diagonalValue = 1.0/diagonalValue;
      } else {
	dropColumn=true;
	workDouble_[iColumn]=1.0e100;
	diagonalValue=0.0;
	integerParameters_[20]++;
      }
    }
    if (!dropColumn) {
      diagonal_[iColumn]=diagonalValue;
      for (iRow=iColumn+1;iRow<numberRows_;iRow++) {
	double value = work[iRow];
	workNow = sparseFactor_-1; 
	int addOffsetNow = numberRows_-1;;
	for (int jColumn=0;jColumn<iColumn;jColumn++) {
	  double aj = workNow[iColumn];
	  double multiplier = workDouble_[jColumn];
	  double ai = workNow[iRow];
	  addOffsetNow--;
	  workNow += addOffsetNow;
	  value -= aj*ai*multiplier;
	}
        work[iRow] = value*diagonalValue;
      }
    } else {
      // drop column
      rowsDropped[iColumn]=2;
      numberDropped++;
      diagonal_[iColumn]=0.0;
      for (iRow=iColumn+1;iRow<numberRows_;iRow++) {
        work[iRow] = 0.0;
      }
    }
    addOffset--;
    work += addOffset;
  }
  doubleParameters_[3]=largest;
  doubleParameters_[4]=smallest;
  integerParameters_[20]=numberDropped;
  sparseFactor_=xx;
  diagonal_=yy;
}
//#define POS_DEBUG
#ifdef POS_DEBUG
static int counter=0;
int ClpCholeskyDense::bNumber(const longDouble * array, int &iRow, int &iCol)
{
  int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
  longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
  int k = array-a;
  assert ((k%BLOCKSQ)==0);
  iCol=0;
  int kk=k>>BLOCKSQSHIFT;
  //printf("%d %d %d %d\n",k,kk,BLOCKSQ,BLOCKSQSHIFT);
  k=kk;
  while (k>=numberBlocks) {
    iCol++;
    k -= numberBlocks;
    numberBlocks--;
  }
  iRow = iCol+k;
  counter++;
  if(counter>100000)
    exit(77);
  return kk;
 }
#endif
/* Factorize - filling in rowsDropped and returning number dropped */
void 
ClpCholeskyDense::factorizePart2( int * rowsDropped) 
{
  int iColumn;
  //longDouble * xx = sparseFactor_;
  //longDouble * yy = diagonal_;
  //diagonal_ = sparseFactor_ + 40000;
  //sparseFactor_=diagonal_ + numberRows_;
  //memcpy(sparseFactor_,xx,sizeFactor_*sizeof(double));
  //memcpy(sparseFactor_,xx,40000*sizeof(double));
  //memcpy(diagonal_,yy,numberRows_*sizeof(double));
  int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
  // later align on boundary
  longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
  int n=numberRows_;
  int nRound = numberRows_&(~(BLOCK-1));
  // adjust if exact
  if (nRound==n)
    nRound -= BLOCK;
  int sizeLastBlock=n-nRound;
  int get = n*(n-1)/2; // ? as no diagonal
  int block = numberBlocks*(numberBlocks+1)/2;
  int ifOdd;
  int rowLast;
  if (sizeLastBlock!=BLOCK) {
    longDouble * aa = &a[(block-1)*BLOCKSQ];
    rowLast=nRound-1;
    ifOdd=1;
    int put = BLOCKSQ;
    // do last separately
    put -= (BLOCK-sizeLastBlock)*(BLOCK+1);
    for (iColumn=numberRows_-1;iColumn>=nRound;iColumn--) {
      int put2 = put;
      put -= BLOCK;
      for (int iRow=numberRows_-1;iRow>iColumn;iRow--) {
	aa[--put2] = sparseFactor_[--get];
	assert (aa+put2>=sparseFactor_+get);
      }
      // save diagonal as well
      aa[--put2]=diagonal_[iColumn];
    }
    n=nRound;
    block--;
  } else {
    // exact fit
    rowLast=numberRows_-1;
    ifOdd=0;
  }
  // Now main loop
  int nBlock=0;
  for (;n>0;n-=BLOCK) {
    longDouble * aa = &a[(block-1)*BLOCKSQ];
    longDouble * aaLast=NULL;
    int put = BLOCKSQ;
    int putLast=0;
    // see if we have small block
    if (ifOdd) {
      aaLast = &a[(block-1)*BLOCKSQ];
      aa = aaLast-BLOCKSQ;
      putLast = BLOCKSQ-BLOCK+sizeLastBlock;
    }
    for (iColumn=n-1;iColumn>=n-BLOCK;iColumn--) {
      if (aaLast) {
	// last bit
	for (int iRow=numberRows_-1;iRow>rowLast;iRow--) {
	  aaLast[--putLast] = sparseFactor_[--get];
	  assert (aaLast+putLast>=sparseFactor_+get);
	}
	putLast -= BLOCK-sizeLastBlock;
      }
      longDouble * aPut = aa;
      int j=rowLast;
      for (int jBlock=0;jBlock<=nBlock;jBlock++) {
	int put2 = put;
	int last = CoinMax(j-BLOCK,iColumn);
	for (int iRow=j;iRow>last;iRow--) {
	  aPut[--put2] = sparseFactor_[--get];
	  assert (aPut+put2>=sparseFactor_+get);
	}
	if (j-BLOCK<iColumn) {
	  // save diagonal as well
	  aPut[--put2]=diagonal_[iColumn];
	}
	j -= BLOCK;
	aPut -=BLOCKSQ;
      }
      put -= BLOCK;
    }
    nBlock++;
    block -= nBlock+ifOdd;
  }
  factor(a,numberRows_,numberBlocks,
	 diagonal_,workDouble_,rowsDropped);
  //sparseFactor_=xx;
  //diagonal_=yy;
}
// Non leaf recursive factor
void 
ClpCholeskyDense::factor(longDouble * a, int n, int numberBlocks,
	    longDouble * diagonal, longDouble * work, int * rowsDropped)
{
  if (n<=BLOCK) {
    factorLeaf(a,n,diagonal,work,rowsDropped);
  } else {
    int nb=number_blocks((n+1)>>1);
    int nThis=number_rows(nb);
    longDouble * aother;
    int nLeft=n-nThis;
    int nintri=(nb*(nb+1))>>1;
    int nbelow=(numberBlocks-nb)*nb;
    factor(a,nThis,numberBlocks,diagonal,work,rowsDropped);
    triRec(a,nThis,a+number_entries(nb),diagonal,work,nLeft,nb,0,numberBlocks);
    aother=a+number_entries(nintri+nbelow);
    recTri(a+number_entries(nb),nLeft,nThis,nb,0,aother,diagonal,work,numberBlocks);
    factor(aother,nLeft,
        numberBlocks-nb,diagonal+nThis,work+nThis,rowsDropped);
  }
}
// Non leaf recursive triangle rectangle update
void 
ClpCholeskyDense::triRec(longDouble * aTri, int nThis, longDouble * aUnder,
			 longDouble * diagonal, longDouble * work,
			 int nLeft, int iBlock, int jBlock,
			 int numberBlocks)
{
  if (nThis<=BLOCK&&nLeft<=BLOCK) {
    triRecLeaf(aTri,aUnder,diagonal,work,nLeft);
  } else if (nThis<nLeft) {
    int nb=number_blocks((nLeft+1)>>1);
    int nLeft2=number_rows(nb);
    triRec(aTri,nThis,aUnder,diagonal,work,nLeft2,iBlock,jBlock,numberBlocks);
    triRec(aTri,nThis,aUnder+number_entries(nb),diagonal,work,nLeft-nLeft2,
          iBlock+nb,jBlock,numberBlocks);
  } else {
    int nb=number_blocks((nThis+1)>>1);
    int nThis2=number_rows(nb);
    longDouble * aother;
    int kBlock=jBlock+nb;
    int i;
    int nintri=(nb*(nb+1))>>1;
    int nbelow=(numberBlocks-nb)*nb;
    triRec(aTri,nThis2,aUnder,diagonal,work,nLeft,iBlock,jBlock,numberBlocks);
    /* and rectangular update */
    i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)-
      (numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1;
    aother=aUnder+number_entries(i);
    recRec(aTri+number_entries(nb),nThis-nThis2,nLeft,nThis2,aUnder,aother,
          diagonal,work,iBlock,kBlock,jBlock,numberBlocks);
    triRec(aTri+number_entries(nintri+nbelow),nThis-nThis2,aother,diagonal+nThis2,
	   work+nThis2,nLeft,
      iBlock-nb,kBlock-nb,numberBlocks-nb);
  }
}
// Non leaf recursive rectangle triangle update
void 
ClpCholeskyDense::recTri(longDouble * aUnder, int nTri, int nDo,
			 int iBlock, int jBlock,longDouble * aTri,
			 longDouble * diagonal, longDouble * work,
			 int numberBlocks)
{
  if (nTri<=BLOCK&&nDo<=BLOCK) {
    recTriLeaf(aUnder,aTri,diagonal,work,nTri);
  } else if (nTri<nDo) {
    int nb=number_blocks((nDo+1)>>1);
    int nDo2=number_rows(nb);
    longDouble * aother;
    int i;
    recTri(aUnder,nTri,nDo2,iBlock,jBlock,aTri,diagonal,work,numberBlocks);
    i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)-
      (numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1;
    aother=aUnder+number_entries(i);
    recTri(aother,nTri,nDo-nDo2,iBlock-nb,jBlock,aTri,diagonal+nDo2,
	   work+nDo2,numberBlocks-nb);
  } else {
    int nb=number_blocks((nTri+1)>>1);
    int nTri2=number_rows(nb);
    longDouble * aother;
    int i;
    recTri(aUnder,nTri2,nDo,iBlock,jBlock,aTri,diagonal,work,numberBlocks);
    /* and rectangular update */
    i=((numberBlocks-iBlock)*(numberBlocks-iBlock+1)-
      (numberBlocks-iBlock-nb)*(numberBlocks-iBlock-nb+1))>>1;
    aother=aTri+number_entries(nb);
    recRec(aUnder,nTri2,nTri-nTri2,nDo,aUnder+number_entries(nb),aother,
	   diagonal,work,iBlock+nb,iBlock,jBlock,numberBlocks);
    recTri(aUnder+number_entries(nb),nTri-nTri2,nDo,iBlock+nb,jBlock,
         aTri+number_entries(i),diagonal,work,numberBlocks);
  }
}
#ifdef CHOL_USING_BLAS
// using simple blas interface
extern "C" 
{
  /** BLAS Fortran subroutine DGEMM. */
  void F77_FUNC(dgemm,DGEMM)(char* transa, char* transb,
                             ipfint *m, ipfint *n, ipfint *k,
                             const double *alpha, const double *a, ipfint *lda,
                             const double *b, ipfint *ldb, const double *beta,
                             double *c, ipfint *ldc,
                             int transa_len, int transb_len);
}
#endif
/* Non leaf recursive rectangle rectangle update,
   nUnder is number of rows in iBlock,
   nUnderK is number of rows in kBlock
*/
void 
ClpCholeskyDense::recRec(longDouble * above, int nUnder, int nUnderK,
			 int nDo, longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work,
	    int kBlock,int iBlock, int jBlock,
	    int numberBlocks)
{
#ifdef CHOL_USING_BLAS
    recRecLeaf(above , aUnder ,  aOther, diagonal,work, nUnderK);
void 
ClpCholeskyDense::recRecLeaf(longDouble * above, 
			     longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work,
			     int nUnder)
  aa = aOther-BLOCK;
  for (j = 0; j < BLOCK; j ++) {
    aa+=BLOCK;
    for (i=0;i< nUnderK;i++) {
      longDouble t00=aa[i+0*BLOCK];
      for (k=0;k<BLOCK;k++) {
	longDouble a00=aUnder[i+k*BLOCK]*work[k];
	t00 -= a00 * above[j + k * BLOCK];
      }
      aa[i]=t00;
    }
  return;
#endif
  if (nDo<=BLOCK&&nUnder<=BLOCK&&nUnderK<=BLOCK) {
    assert (nDo==BLOCK&&nUnder==BLOCK);
    recRecLeaf(above , aUnder ,  aOther, diagonal,work, nUnderK);
  } else if (nDo<=nUnderK&&nUnder<=nUnderK) {
    int nb=number_blocks((nUnderK+1)>>1);
    int nUnder2=number_rows(nb);
    recRec(above,nUnder,nUnder2,nDo,aUnder,aOther,diagonal,work,
               kBlock,iBlock,jBlock,numberBlocks);
    recRec(above,nUnder,nUnderK-nUnder2,nDo,aUnder+number_entries(nb),
        aOther+number_entries(nb),diagonal,work,kBlock+nb,iBlock,jBlock,numberBlocks);
  } else if (nUnderK<=nDo&&nUnder<=nDo) {
    int nb=number_blocks((nDo+1)>>1);
    int nDo2=number_rows(nb);
    int i;
    recRec(above,nUnder,nUnderK,nDo2,aUnder,aOther,diagonal,work,
         kBlock,iBlock,jBlock,numberBlocks);
    i=((numberBlocks-jBlock)*(numberBlocks-jBlock-1)-
      (numberBlocks-jBlock-nb)*(numberBlocks-jBlock-nb-1))>>1;
    recRec(above+number_entries(i),nUnder,nUnderK,nDo-nDo2,
	   aUnder+number_entries(i),
	   aOther,diagonal+nDo2,work+nDo2,
	   kBlock-nb,iBlock-nb,jBlock,numberBlocks-nb);
  } else {
    int nb=number_blocks((nUnder+1)>>1);
    int nUnder2=number_rows(nb);
    int i;
    recRec(above,nUnder2,nUnderK,nDo,aUnder,aOther,diagonal,work,
               kBlock,iBlock,jBlock,numberBlocks);
    i=((numberBlocks-iBlock)*(numberBlocks-iBlock-1)-
      (numberBlocks-iBlock-nb)*(numberBlocks-iBlock-nb-1))>>1;
    recRec(above+number_entries(nb),nUnder-nUnder2,nUnderK,nDo,aUnder,
        aOther+number_entries(i),diagonal,work,kBlock,iBlock+nb,jBlock,numberBlocks);
  }
}
// Leaf recursive factor
void 
ClpCholeskyDense::factorLeaf(longDouble * a, int n, 
		longDouble * diagonal, longDouble * work, int * rowsDropped)
{
  longDouble largest=doubleParameters_[3];
  longDouble smallest=doubleParameters_[4];
  double dropValue = doubleParameters_[10];
  int firstPositive=integerParameters_[34];
  int rowOffset=diagonal-diagonal_;
  int numberDropped=0;
  int i, j, k;
  longDouble t00,temp1;
  longDouble * aa;
  aa = a-BLOCK;
  for (j = 0; j < n; j ++) {
    aa+=BLOCK;
    t00 = aa[j];
    for (k = 0; k < j; ++k) {
      longDouble multiplier = work[k];
      t00 -= a[j + k * BLOCK] * a[j + k * BLOCK] * multiplier;
    }
    bool dropColumn=false;
    longDouble useT00=t00;
    if (j+rowOffset<firstPositive) {
      // must be negative
      if (t00<=-dropValue) {
	smallest = CoinMin(smallest,-t00);
	largest = CoinMax(largest,-t00);
	//aa[j]=t00;
	t00 = 1.0/t00;
      } else {
	dropColumn=true;
	//aa[j]=-1.0e100;
	useT00=-1.0e-100;
	t00=0.0;
	integerParameters_[20]++;
      }
    } else {
      // must be positive
      if (t00>=dropValue) {
	smallest = CoinMin(smallest,t00);
	largest = CoinMax(largest,t00);
	//aa[j]=t00;
	t00 = 1.0/t00;
      } else {
	dropColumn=true;
	//aa[j]=1.0e100;
	useT00=1.0e-100;
	t00=0.0;
	integerParameters_[20]++;
      }
    }
    if (!dropColumn) {
      diagonal[j]=t00;
      work[j]=useT00;
      temp1 = t00;
      for (i=j+1;i<n;i++) {
        t00=aa[i];
        for (k = 0; k < j; ++k) {
	  longDouble multiplier = work[k];
          t00 -= a[i + k * BLOCK] * a[j + k * BLOCK] * multiplier;
        }
        aa[i]=t00*temp1;
      }
    } else {
      // drop column
      rowsDropped[j+rowOffset]=2;
      numberDropped++;
      diagonal[j]=0.0;
      //aa[j]=1.0e100;
      work[j]=1.0e100;
      for (i=j+1;i<n;i++) {
        aa[i]=0.0;
      }
    }
  }
  doubleParameters_[3]=largest;
  doubleParameters_[4]=smallest;
  integerParameters_[20]+=numberDropped;
}
// Leaf recursive triangle rectangle update
void 
ClpCholeskyDense::triRecLeaf(longDouble * aTri, longDouble * aUnder, longDouble * diagonal, longDouble * work,
		int nUnder)
{
#ifdef POS_DEBUG
  int iru,icu;
  int iu=bNumber(aUnder,iru,icu);
  int irt,ict;
  int it=bNumber(aTri,irt,ict);
  //printf("%d %d\n",iu,it);
  printf("trirecleaf  under (%d,%d), tri (%d,%d)\n",
	 iru,icu,irt,ict);
  assert (diagonal==diagonal_+ict*BLOCK);
#endif
  int j;
  longDouble * aa;
#if BLOCK==16
  if (nUnder==BLOCK) {
    aa = aTri-2*BLOCK;
    for (j = 0; j < BLOCK; j +=2) {
      aa+=2*BLOCK;
      longDouble temp0 = diagonal[j];
      longDouble temp1 = diagonal[j+1];
      for (int i=0;i<BLOCK;i+=2) {
        longDouble t00=aUnder[i+j*BLOCK];
        longDouble t10=aUnder[i+ BLOCK + j*BLOCK];
        longDouble t01=aUnder[i+1+j*BLOCK];
        longDouble t11=aUnder[i+1+ BLOCK + j*BLOCK];
        for (int k = 0; k < j; ++k) {
	  longDouble multiplier=work[k];
	  longDouble au0 = aUnder[i + k * BLOCK]*multiplier;
	  longDouble au1 = aUnder[i + 1 + k * BLOCK]*multiplier;
	  longDouble at0 = aTri[j + k * BLOCK];
	  longDouble at1 = aTri[j + 1 + k * BLOCK];
          t00 -= au0 * at0;
          t10 -= au0 * at1;
          t01 -= au1 * at0;
          t11 -= au1 * at1;
        }
        t00 *= temp0;
	longDouble at1 = aTri[j + 1 + j*BLOCK]*work[j];
        t10 -= t00 * at1;
        t01 *= temp0;
        t11 -= t01 * at1;
        aUnder[i+j*BLOCK]=t00;
        aUnder[i+1+j*BLOCK]=t01;
        aUnder[i+ BLOCK + j*BLOCK]=t10*temp1;
        aUnder[i+1+ BLOCK + j*BLOCK]=t11*temp1;
      }
    }
  } else {
#endif
    aa = aTri-BLOCK;
    for (j = 0; j < BLOCK; j ++) {
      aa+=BLOCK;
      longDouble temp1 = diagonal[j];
      for (int i=0;i<nUnder;i++) {
        longDouble t00=aUnder[i+j*BLOCK];
        for (int k = 0; k < j; ++k) {
	  longDouble multiplier=work[k];
          t00 -= aUnder[i + k * BLOCK] * aTri[j + k * BLOCK] * multiplier;
        }
        aUnder[i+j*BLOCK]=t00*temp1;
      }
    }
#if BLOCK==16
  }
#endif
}
// Leaf recursive rectangle triangle update
void ClpCholeskyDense::recTriLeaf(longDouble * aUnder, longDouble * aTri, 
				  longDouble * diagonal, longDouble * work, int nUnder)
{
#ifdef POS_DEBUG
  int iru,icu;
  int iu=bNumber(aUnder,iru,icu);
  int irt,ict;
  int it=bNumber(aTri,irt,ict);
  //printf("%d %d\n",iu,it);
  printf("rectrileaf  under (%d,%d), tri (%d,%d)\n",
	 iru,icu,irt,ict);
  assert (diagonal==diagonal_+icu*BLOCK);
#endif
  int i, j, k;
  longDouble t00;
  longDouble * aa;
#if BLOCK==16
  if (nUnder==BLOCK) {
    longDouble * aUnder2 = aUnder-2;
    aa = aTri-2*BLOCK;
    for (j = 0; j < BLOCK; j +=2) {
      longDouble t00,t01,t10,t11;
      aa+=2*BLOCK;
      aUnder2+=2;
      t00=aa[j];
      t01=aa[j+1];
      t10=aa[j+1+BLOCK];
      for (k = 0; k < BLOCK; ++k) {
	longDouble multiplier = work[k];
	longDouble a0 = aUnder2[k * BLOCK];
	longDouble a1 = aUnder2[1 + k * BLOCK];
	longDouble x0 = a0 * multiplier;
	longDouble x1 = a1 * multiplier;
        t00 -= a0 * x0;
        t01 -= a1 * x0;
        t10 -= a1 * x1;
      }
      aa[j]=t00;
      aa[j+1]=t01;
      aa[j+1+BLOCK]=t10;
      for (i=j+2;i< BLOCK;i+=2) {
        t00=aa[i];
        t01=aa[i+BLOCK];
        t10=aa[i+1];
        t11=aa[i+1+BLOCK];
        for (k = 0; k < BLOCK; ++k) {
	  longDouble multiplier = work[k];
	  longDouble a0 = aUnder2[k * BLOCK]*multiplier;
	  longDouble a1 = aUnder2[1 + k * BLOCK]*multiplier;
          t00 -= aUnder[i + k * BLOCK] * a0;
          t01 -= aUnder[i + k * BLOCK] * a1;
          t10 -= aUnder[i + 1 + k * BLOCK] * a0;
          t11 -= aUnder[i + 1 + k * BLOCK] * a1;
        }
        aa[i]=t00;
        aa[i+BLOCK]=t01;
        aa[i+1]=t10;
        aa[i+1+BLOCK]=t11;
      }
    }
  } else {
#endif
    aa = aTri-BLOCK;
    for (j = 0; j < nUnder; j ++) {
      aa+=BLOCK;
      for (i=j;i< nUnder;i++) {
        t00=aa[i];
        for (k = 0; k < BLOCK; ++k) {
	  longDouble multiplier = work[k];
          t00 -= aUnder[i + k * BLOCK] * aUnder[j + k * BLOCK]*multiplier;
        }
        aa[i]=t00;
      }
    }
#if BLOCK==16
  }
#endif
}
/* Leaf recursive rectangle rectangle update,
   nUnder is number of rows in iBlock,
   nUnderK is number of rows in kBlock
*/
void 
ClpCholeskyDense::recRecLeaf(longDouble * above, 
			     longDouble * aUnder, longDouble *aOther, longDouble * diagonal, longDouble * work,
			     int nUnder)
{
#ifdef POS_DEBUG
  int ira,ica;
  int ia=bNumber(above,ira,ica);
  int iru,icu;
  int iu=bNumber(aUnder,iru,icu);
  int iro,ico;
  int io=bNumber(aOther,iro,ico);
  //printf("%d %d %d\n",ia,iu,io);
  printf("recrecleaf above (%d,%d), under (%d,%d), other (%d,%d)\n",
	 ira,ica,iru,icu,iro,ico);
  assert (diagonal==diagonal_+ica*BLOCK);
#endif
  int i, j, k;
  longDouble * aa;
#if BLOCK==16
  aa = aOther-4*BLOCK;
  if (nUnder==BLOCK) {
    //#define INTEL
#ifdef INTEL
    aa+=2*BLOCK;
    for (j = 0; j < BLOCK; j +=2) {
      aa+=2*BLOCK;
      for (i=0;i< BLOCK;i+=2) {
	longDouble t00=aa[i+0*BLOCK];
	longDouble t10=aa[i+1*BLOCK];
	longDouble t01=aa[i+1+0*BLOCK];
	longDouble t11=aa[i+1+1*BLOCK];
	for (k=0;k<BLOCK;k++) {
	  longDouble multiplier = work[k];
          longDouble a00=aUnder[i+k*BLOCK]*multiplier;
          longDouble a01=aUnder[i+1+k*BLOCK]*multiplier;
          t00 -= a00 * above[j + 0 + k * BLOCK];
          t10 -= a00 * above[j + 1 + k * BLOCK];
          t01 -= a01 * above[j + 0 + k * BLOCK];
          t11 -= a01 * above[j + 1 + k * BLOCK];
	}
	aa[i+0*BLOCK]=t00;
	aa[i+1*BLOCK]=t10;
	aa[i+1+0*BLOCK]=t01;
	aa[i+1+1*BLOCK]=t11;
      }
    }
#else
    for (j = 0; j < BLOCK; j +=4) {
      aa+=4*BLOCK;
      for (i=0;i< BLOCK;i+=4) {
	longDouble t00=aa[i+0+0*BLOCK];
	longDouble t10=aa[i+0+1*BLOCK];
	longDouble t20=aa[i+0+2*BLOCK];
	longDouble t30=aa[i+0+3*BLOCK];
	longDouble t01=aa[i+1+0*BLOCK];
	longDouble t11=aa[i+1+1*BLOCK];
	longDouble t21=aa[i+1+2*BLOCK];
	longDouble t31=aa[i+1+3*BLOCK];
	longDouble t02=aa[i+2+0*BLOCK];
	longDouble t12=aa[i+2+1*BLOCK];
	longDouble t22=aa[i+2+2*BLOCK];
	longDouble t32=aa[i+2+3*BLOCK];
	longDouble t03=aa[i+3+0*BLOCK];
	longDouble t13=aa[i+3+1*BLOCK];
	longDouble t23=aa[i+3+2*BLOCK];
	longDouble t33=aa[i+3+3*BLOCK];
	for (k=0;k<BLOCK;k++) {
	  longDouble multiplier = work[k];
          longDouble a00=aUnder[i+0+k*BLOCK]*multiplier;
          longDouble a01=aUnder[i+1+k*BLOCK]*multiplier;
          longDouble a02=aUnder[i+2+k*BLOCK]*multiplier;
          longDouble a03=aUnder[i+3+k*BLOCK]*multiplier;
          t00 -= a00 * above[j + 0 + k * BLOCK];
          t10 -= a00 * above[j + 1 + k * BLOCK];
          t20 -= a00 * above[j + 2 + k * BLOCK];
          t30 -= a00 * above[j + 3 + k * BLOCK];
          t01 -= a01 * above[j + 0 + k * BLOCK];
          t11 -= a01 * above[j + 1 + k * BLOCK];
          t21 -= a01 * above[j + 2 + k * BLOCK];
          t31 -= a01 * above[j + 3 + k * BLOCK];
          t02 -= a02 * above[j + 0 + k * BLOCK];
          t12 -= a02 * above[j + 1 + k * BLOCK];
          t22 -= a02 * above[j + 2 + k * BLOCK];
          t32 -= a02 * above[j + 3 + k * BLOCK];
          t03 -= a03 * above[j + 0 + k * BLOCK];
          t13 -= a03 * above[j + 1 + k * BLOCK];
          t23 -= a03 * above[j + 2 + k * BLOCK];
          t33 -= a03 * above[j + 3 + k * BLOCK];
	}
	aa[i+0+0*BLOCK]=t00;
	aa[i+0+1*BLOCK]=t10;
	aa[i+0+2*BLOCK]=t20;
	aa[i+0+3*BLOCK]=t30;
	aa[i+1+0*BLOCK]=t01;
	aa[i+1+1*BLOCK]=t11;
	aa[i+1+2*BLOCK]=t21;
	aa[i+1+3*BLOCK]=t31;
	aa[i+2+0*BLOCK]=t02;
	aa[i+2+1*BLOCK]=t12;
	aa[i+2+2*BLOCK]=t22;
	aa[i+2+3*BLOCK]=t32;
	aa[i+3+0*BLOCK]=t03;
	aa[i+3+1*BLOCK]=t13;
	aa[i+3+2*BLOCK]=t23;
	aa[i+3+3*BLOCK]=t33;
      }
    }
#endif
  } else {
    int odd=nUnder&1;
    int n=nUnder-odd;
    for (j = 0; j < BLOCK; j +=4) {
      aa+=4*BLOCK;
      for (i=0;i< n;i+=2) {
	longDouble t00=aa[i+0*BLOCK];
	longDouble t10=aa[i+1*BLOCK];
	longDouble t20=aa[i+2*BLOCK];
	longDouble t30=aa[i+3*BLOCK];
	longDouble t01=aa[i+1+0*BLOCK];
	longDouble t11=aa[i+1+1*BLOCK];
	longDouble t21=aa[i+1+2*BLOCK];
	longDouble t31=aa[i+1+3*BLOCK];
	for (k=0;k<BLOCK;k++) {
	  longDouble multiplier = work[k];
          longDouble a00=aUnder[i+k*BLOCK]*multiplier;
          longDouble a01=aUnder[i+1+k*BLOCK]*multiplier;
          t00 -= a00 * above[j + 0 + k * BLOCK];
          t10 -= a00 * above[j + 1 + k * BLOCK];
          t20 -= a00 * above[j + 2 + k * BLOCK];
          t30 -= a00 * above[j + 3 + k * BLOCK];
          t01 -= a01 * above[j + 0 + k * BLOCK];
          t11 -= a01 * above[j + 1 + k * BLOCK];
          t21 -= a01 * above[j + 2 + k * BLOCK];
          t31 -= a01 * above[j + 3 + k * BLOCK];
	}
	aa[i+0*BLOCK]=t00;
	aa[i+1*BLOCK]=t10;
	aa[i+2*BLOCK]=t20;
	aa[i+3*BLOCK]=t30;
	aa[i+1+0*BLOCK]=t01;
	aa[i+1+1*BLOCK]=t11;
	aa[i+1+2*BLOCK]=t21;
	aa[i+1+3*BLOCK]=t31;
      }
      if (odd) {
	longDouble t0=aa[n+0*BLOCK];
	longDouble t1=aa[n+1*BLOCK];
	longDouble t2=aa[n+2*BLOCK];
	longDouble t3=aa[n+3*BLOCK];
	longDouble a0;
	for (k=0;k<BLOCK;k++) {
          a0=aUnder[n+k*BLOCK]*work[k];
          t0 -= a0 * above[j + 0 + k * BLOCK];
          t1 -= a0 * above[j + 1 + k * BLOCK];
          t2 -= a0 * above[j + 2 + k * BLOCK];
          t3 -= a0 * above[j + 3 + k * BLOCK];
	}
	aa[n+0*BLOCK]=t0;
	aa[n+1*BLOCK]=t1;
	aa[n+2*BLOCK]=t2;
	aa[n+3*BLOCK]=t3;
      }
    }
  }
#else
  aa = aOther-BLOCK;
  for (j = 0; j < BLOCK; j ++) {
    aa+=BLOCK;
    for (i=0;i< nUnder;i++) {
      longDouble t00=aa[i+0*BLOCK];
      for (k=0;k<BLOCK;k++) {
	longDouble a00=aUnder[i+k*BLOCK]*work[k];
	t00 -= a00 * above[j + k * BLOCK];
      }
      aa[i]=t00;
    }
  }
#endif
}
/* Uses factorization to solve. */
void 
ClpCholeskyDense::solve (double * region) 
{
#ifdef CHOL_COMPARE
  double * region2=NULL;
  if (numberRows_<200) {
    longDouble * xx = sparseFactor_;
    longDouble * yy = diagonal_;
    diagonal_ = sparseFactor_ + 40000;
    sparseFactor_=diagonal_ + numberRows_;
    region2 = ClpCopyOfArray(region,numberRows_);
    int iRow,iColumn;
    int addOffset=numberRows_-1;
    longDouble * work = sparseFactor_-1;
    for (iColumn=0;iColumn<numberRows_;iColumn++) {
      double value = region2[iColumn];
      for (iRow=iColumn+1;iRow<numberRows_;iRow++)
	region2[iRow] -= value*work[iRow];
      addOffset--;
      work += addOffset;
    }
    for (iColumn=numberRows_-1;iColumn>=0;iColumn--) {
      double value = region2[iColumn]*diagonal_[iColumn];
      work -= addOffset;
      addOffset++;
      for (iRow=iColumn+1;iRow<numberRows_;iRow++)
	value -= region2[iRow]*work[iRow];
      region2[iColumn]=value;
    }
    sparseFactor_=xx;
    diagonal_=yy;
  }
#endif
  //longDouble * xx = sparseFactor_;
  //longDouble * yy = diagonal_;
  //diagonal_ = sparseFactor_ + 40000;
  //sparseFactor_=diagonal_ + numberRows_;
  int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
  // later align on boundary
  longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
  int iBlock;
  longDouble * aa = a;
  int iColumn;
  for (iBlock=0;iBlock<numberBlocks;iBlock++) {
    int nChunk;
    int jBlock;
    int iDo = iBlock*BLOCK;
    int base=iDo;
    if (iDo+BLOCK>numberRows_) {
      nChunk=numberRows_-iDo;
    } else {
      nChunk=BLOCK;
    }
    solveF1(aa,nChunk,region+iDo);
    for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
      base+=BLOCK;
      aa+=BLOCKSQ;
      if (base+BLOCK>numberRows_) {
	nChunk=numberRows_-base;
      } else {
	nChunk=BLOCK;
      }
      solveF2(aa,nChunk,region+iDo,region+base);
    }
    aa+=BLOCKSQ;
  }
  // do diagonal outside
  for (iColumn=0;iColumn<numberRows_;iColumn++) 
    region[iColumn] *= diagonal_[iColumn];
  int offset=((numberBlocks*(numberBlocks+1))>>1);
  aa = a+number_entries(offset-1);
  int lBase=(numberBlocks-1)*BLOCK;
  for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) {
    int nChunk;
    int jBlock;
    int triBase=iBlock*BLOCK;
    int iBase=lBase;
    for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
      if (iBase+BLOCK>numberRows_) {
	nChunk=numberRows_-iBase;
      } else {
	nChunk=BLOCK;
      }
      solveB2(aa,nChunk,region+triBase,region+iBase);
      iBase-=BLOCK;
      aa-=BLOCKSQ;
    }
    if (triBase+BLOCK>numberRows_) {
      nChunk=numberRows_-triBase;
    } else {
      nChunk=BLOCK;
    }
    solveB1(aa,nChunk,region+triBase);
    aa-=BLOCKSQ;
  }
#ifdef CHOL_COMPARE
  if (numberRows_<200) {
    for (int i=0;i<numberRows_;i++) {
      assert(fabs(region[i]-region2[i])<1.0e-3);
    }
    delete [] region2;
  }
#endif
}
// Forward part of solve 1
void 
ClpCholeskyDense::solveF1(longDouble * a,int n,double * region)
{
  int j, k;
  longDouble t00;
  for (j = 0; j < n; j ++) {
    t00 = region[j];
    for (k = 0; k < j; ++k) {
      t00 -= region[k] * a[j + k * BLOCK];
    }
    //t00*=a[j + j * BLOCK];
    region[j] = t00;
  }
}
// Forward part of solve 2
void 
ClpCholeskyDense::solveF2(longDouble * a,int n,double * region, double * region2)
{
  int j, k;
#if BLOCK==16
  if (n==BLOCK) {
    for (k = 0; k < BLOCK; k+=4) {
      longDouble t0 = region2[0];
      longDouble t1 = region2[1];
      longDouble t2 = region2[2];
      longDouble t3 = region2[3];
      t0 -= region[0] * a[0 + 0 * BLOCK];
      t1 -= region[0] * a[1 + 0 * BLOCK];
      t2 -= region[0] * a[2 + 0 * BLOCK];
      t3 -= region[0] * a[3 + 0 * BLOCK];

      t0 -= region[1] * a[0 + 1 * BLOCK];
      t1 -= region[1] * a[1 + 1 * BLOCK];
      t2 -= region[1] * a[2 + 1 * BLOCK];
      t3 -= region[1] * a[3 + 1 * BLOCK];

      t0 -= region[2] * a[0 + 2 * BLOCK];
      t1 -= region[2] * a[1 + 2 * BLOCK];
      t2 -= region[2] * a[2 + 2 * BLOCK];
      t3 -= region[2] * a[3 + 2 * BLOCK];

      t0 -= region[3] * a[0 + 3 * BLOCK];
      t1 -= region[3] * a[1 + 3 * BLOCK];
      t2 -= region[3] * a[2 + 3 * BLOCK];
      t3 -= region[3] * a[3 + 3 * BLOCK];

      t0 -= region[4] * a[0 + 4 * BLOCK];
      t1 -= region[4] * a[1 + 4 * BLOCK];
      t2 -= region[4] * a[2 + 4 * BLOCK];
      t3 -= region[4] * a[3 + 4 * BLOCK];

      t0 -= region[5] * a[0 + 5 * BLOCK];
      t1 -= region[5] * a[1 + 5 * BLOCK];
      t2 -= region[5] * a[2 + 5 * BLOCK];
      t3 -= region[5] * a[3 + 5 * BLOCK];

      t0 -= region[6] * a[0 + 6 * BLOCK];
      t1 -= region[6] * a[1 + 6 * BLOCK];
      t2 -= region[6] * a[2 + 6 * BLOCK];
      t3 -= region[6] * a[3 + 6 * BLOCK];

      t0 -= region[7] * a[0 + 7 * BLOCK];
      t1 -= region[7] * a[1 + 7 * BLOCK];
      t2 -= region[7] * a[2 + 7 * BLOCK];
      t3 -= region[7] * a[3 + 7 * BLOCK];
#if BLOCK>8
      t0 -= region[8] * a[0 + 8 * BLOCK];
      t1 -= region[8] * a[1 + 8 * BLOCK];
      t2 -= region[8] * a[2 + 8 * BLOCK];
      t3 -= region[8] * a[3 + 8 * BLOCK];

      t0 -= region[9] * a[0 + 9 * BLOCK];
      t1 -= region[9] * a[1 + 9 * BLOCK];
      t2 -= region[9] * a[2 + 9 * BLOCK];
      t3 -= region[9] * a[3 + 9 * BLOCK];

      t0 -= region[10] * a[0 + 10 * BLOCK];
      t1 -= region[10] * a[1 + 10 * BLOCK];
      t2 -= region[10] * a[2 + 10 * BLOCK];
      t3 -= region[10] * a[3 + 10 * BLOCK];

      t0 -= region[11] * a[0 + 11 * BLOCK];
      t1 -= region[11] * a[1 + 11 * BLOCK];
      t2 -= region[11] * a[2 + 11 * BLOCK];
      t3 -= region[11] * a[3 + 11 * BLOCK];

      t0 -= region[12] * a[0 + 12 * BLOCK];
      t1 -= region[12] * a[1 + 12 * BLOCK];
      t2 -= region[12] * a[2 + 12 * BLOCK];
      t3 -= region[12] * a[3 + 12 * BLOCK];

      t0 -= region[13] * a[0 + 13 * BLOCK];
      t1 -= region[13] * a[1 + 13 * BLOCK];
      t2 -= region[13] * a[2 + 13 * BLOCK];
      t3 -= region[13] * a[3 + 13 * BLOCK];

      t0 -= region[14] * a[0 + 14 * BLOCK];
      t1 -= region[14] * a[1 + 14 * BLOCK];
      t2 -= region[14] * a[2 + 14 * BLOCK];
      t3 -= region[14] * a[3 + 14 * BLOCK];

      t0 -= region[15] * a[0 + 15 * BLOCK];
      t1 -= region[15] * a[1 + 15 * BLOCK];
      t2 -= region[15] * a[2 + 15 * BLOCK];
      t3 -= region[15] * a[3 + 15 * BLOCK];
#endif
      region2[0] = t0;
      region2[1] = t1;
      region2[2] = t2;
      region2[3] = t3;
      region2+=4;
      a+=4;
    }
  } else {
#endif
    for (k = 0; k < n; ++k) {
      longDouble t00 = region2[k];
      for (j = 0; j < BLOCK; j ++) {
        t00 -= region[j] * a[k + j * BLOCK];
      }
      region2[k] = t00;
    }
#if BLOCK==16
  }
#endif
}
// Backward part of solve 1
void 
ClpCholeskyDense::solveB1(longDouble * a,int n,double * region)
{
  int j, k;
  longDouble t00;
  for (j = n-1; j >=0; j --) {
    t00 = region[j];
    for (k = j+1; k < n; ++k) {
      t00 -= region[k] * a[k + j * BLOCK];
    }
    //t00*=a[j + j * BLOCK];
    region[j] = t00;
  }
}
// Backward part of solve 2
void 
ClpCholeskyDense::solveB2(longDouble * a,int n,double * region, double * region2)
{
  int j, k;
#if BLOCK==16
  if (n==BLOCK) {
    for (j = 0; j < BLOCK; j +=4) {
      longDouble t0 = region[0];
      longDouble t1 = region[1];
      longDouble t2 = region[2];
      longDouble t3 = region[3];
      t0 -= region2[0] * a[0 + 0*BLOCK];
      t1 -= region2[0] * a[0 + 1*BLOCK];
      t2 -= region2[0] * a[0 + 2*BLOCK];
      t3 -= region2[0] * a[0 + 3*BLOCK];

      t0 -= region2[1] * a[1 + 0*BLOCK];
      t1 -= region2[1] * a[1 + 1*BLOCK];
      t2 -= region2[1] * a[1 + 2*BLOCK];
      t3 -= region2[1] * a[1 + 3*BLOCK];

      t0 -= region2[2] * a[2 + 0*BLOCK];
      t1 -= region2[2] * a[2 + 1*BLOCK];
      t2 -= region2[2] * a[2 + 2*BLOCK];
      t3 -= region2[2] * a[2 + 3*BLOCK];

      t0 -= region2[3] * a[3 + 0*BLOCK];
      t1 -= region2[3] * a[3 + 1*BLOCK];
      t2 -= region2[3] * a[3 + 2*BLOCK];
      t3 -= region2[3] * a[3 + 3*BLOCK];

      t0 -= region2[4] * a[4 + 0*BLOCK];
      t1 -= region2[4] * a[4 + 1*BLOCK];
      t2 -= region2[4] * a[4 + 2*BLOCK];
      t3 -= region2[4] * a[4 + 3*BLOCK];

      t0 -= region2[5] * a[5 + 0*BLOCK];
      t1 -= region2[5] * a[5 + 1*BLOCK];
      t2 -= region2[5] * a[5 + 2*BLOCK];
      t3 -= region2[5] * a[5 + 3*BLOCK];

      t0 -= region2[6] * a[6 + 0*BLOCK];
      t1 -= region2[6] * a[6 + 1*BLOCK];
      t2 -= region2[6] * a[6 + 2*BLOCK];
      t3 -= region2[6] * a[6 + 3*BLOCK];

      t0 -= region2[7] * a[7 + 0*BLOCK];
      t1 -= region2[7] * a[7 + 1*BLOCK];
      t2 -= region2[7] * a[7 + 2*BLOCK];
      t3 -= region2[7] * a[7 + 3*BLOCK];
#if BLOCK>8

      t0 -= region2[8] * a[8 + 0*BLOCK];
      t1 -= region2[8] * a[8 + 1*BLOCK];
      t2 -= region2[8] * a[8 + 2*BLOCK];
      t3 -= region2[8] * a[8 + 3*BLOCK];

      t0 -= region2[9] * a[9 + 0*BLOCK];
      t1 -= region2[9] * a[9 + 1*BLOCK];
      t2 -= region2[9] * a[9 + 2*BLOCK];
      t3 -= region2[9] * a[9 + 3*BLOCK];

      t0 -= region2[10] * a[10 + 0*BLOCK];
      t1 -= region2[10] * a[10 + 1*BLOCK];
      t2 -= region2[10] * a[10 + 2*BLOCK];
      t3 -= region2[10] * a[10 + 3*BLOCK];

      t0 -= region2[11] * a[11 + 0*BLOCK];
      t1 -= region2[11] * a[11 + 1*BLOCK];
      t2 -= region2[11] * a[11 + 2*BLOCK];
      t3 -= region2[11] * a[11 + 3*BLOCK];

      t0 -= region2[12] * a[12 + 0*BLOCK];
      t1 -= region2[12] * a[12 + 1*BLOCK];
      t2 -= region2[12] * a[12 + 2*BLOCK];
      t3 -= region2[12] * a[12 + 3*BLOCK];

      t0 -= region2[13] * a[13 + 0*BLOCK];
      t1 -= region2[13] * a[13 + 1*BLOCK];
      t2 -= region2[13] * a[13 + 2*BLOCK];
      t3 -= region2[13] * a[13 + 3*BLOCK];

      t0 -= region2[14] * a[14 + 0*BLOCK];
      t1 -= region2[14] * a[14 + 1*BLOCK];
      t2 -= region2[14] * a[14 + 2*BLOCK];
      t3 -= region2[14] * a[14 + 3*BLOCK];

      t0 -= region2[15] * a[15 + 0*BLOCK];
      t1 -= region2[15] * a[15 + 1*BLOCK];
      t2 -= region2[15] * a[15 + 2*BLOCK];
      t3 -= region2[15] * a[15 + 3*BLOCK];
#endif
      region[0] = t0;
      region[1] = t1;
      region[2] = t2;
      region[3] = t3;
      a+=4*BLOCK;
      region+=4;
    }
  } else {
#endif
    for (j = 0; j < BLOCK; j ++) {
      longDouble t00 = region[j];
      for (k = 0; k < n; ++k) {
        t00 -= region2[k] * a[k + j * BLOCK];
      }
      region[j] = t00;
    }
#if BLOCK==16
  }
#endif
}
/* Uses factorization to solve. */
void 
ClpCholeskyDense::solveLong (longDouble * region) 
{
  int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
  // later align on boundary
  longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
  int iBlock;
  longDouble * aa = a;
  int iColumn;
  for (iBlock=0;iBlock<numberBlocks;iBlock++) {
    int nChunk;
    int jBlock;
    int iDo = iBlock*BLOCK;
    int base=iDo;
    if (iDo+BLOCK>numberRows_) {
      nChunk=numberRows_-iDo;
    } else {
      nChunk=BLOCK;
    }
    solveF1Long(aa,nChunk,region+iDo);
    for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
      base+=BLOCK;
      aa+=BLOCKSQ;
      if (base+BLOCK>numberRows_) {
	nChunk=numberRows_-base;
      } else {
	nChunk=BLOCK;
      }
      solveF2Long(aa,nChunk,region+iDo,region+base);
    }
    aa+=BLOCKSQ;
  }
  // do diagonal outside
  for (iColumn=0;iColumn<numberRows_;iColumn++) 
    region[iColumn] *= diagonal_[iColumn];
  int offset=((numberBlocks*(numberBlocks+1))>>1);
  aa = a+number_entries(offset-1);
  int lBase=(numberBlocks-1)*BLOCK;
  for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) {
    int nChunk;
    int jBlock;
    int triBase=iBlock*BLOCK;
    int iBase=lBase;
    for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
      if (iBase+BLOCK>numberRows_) {
	nChunk=numberRows_-iBase;
      } else {
	nChunk=BLOCK;
      }
      solveB2Long(aa,nChunk,region+triBase,region+iBase);
      iBase-=BLOCK;
      aa-=BLOCKSQ;
    }
    if (triBase+BLOCK>numberRows_) {
      nChunk=numberRows_-triBase;
    } else {
      nChunk=BLOCK;
    }
    solveB1Long(aa,nChunk,region+triBase);
    aa-=BLOCKSQ;
  }
}
// Forward part of solve 1
void 
ClpCholeskyDense::solveF1Long(longDouble * a,int n,longDouble * region)
{
  int j, k;
  longDouble t00;
  for (j = 0; j < n; j ++) {
    t00 = region[j];
    for (k = 0; k < j; ++k) {
      t00 -= region[k] * a[j + k * BLOCK];
    }
    //t00*=a[j + j * BLOCK];
    region[j] = t00;
  }
}
// Forward part of solve 2
void 
ClpCholeskyDense::solveF2Long(longDouble * a,int n,longDouble * region, longDouble * region2)
{
  int j, k;
#if BLOCK==16
  if (n==BLOCK) {
    for (k = 0; k < BLOCK; k+=4) {
      longDouble t0 = region2[0];
      longDouble t1 = region2[1];
      longDouble t2 = region2[2];
      longDouble t3 = region2[3];
      t0 -= region[0] * a[0 + 0 * BLOCK];
      t1 -= region[0] * a[1 + 0 * BLOCK];
      t2 -= region[0] * a[2 + 0 * BLOCK];
      t3 -= region[0] * a[3 + 0 * BLOCK];

      t0 -= region[1] * a[0 + 1 * BLOCK];
      t1 -= region[1] * a[1 + 1 * BLOCK];
      t2 -= region[1] * a[2 + 1 * BLOCK];
      t3 -= region[1] * a[3 + 1 * BLOCK];

      t0 -= region[2] * a[0 + 2 * BLOCK];
      t1 -= region[2] * a[1 + 2 * BLOCK];
      t2 -= region[2] * a[2 + 2 * BLOCK];
      t3 -= region[2] * a[3 + 2 * BLOCK];

      t0 -= region[3] * a[0 + 3 * BLOCK];
      t1 -= region[3] * a[1 + 3 * BLOCK];
      t2 -= region[3] * a[2 + 3 * BLOCK];
      t3 -= region[3] * a[3 + 3 * BLOCK];

      t0 -= region[4] * a[0 + 4 * BLOCK];
      t1 -= region[4] * a[1 + 4 * BLOCK];
      t2 -= region[4] * a[2 + 4 * BLOCK];
      t3 -= region[4] * a[3 + 4 * BLOCK];

      t0 -= region[5] * a[0 + 5 * BLOCK];
      t1 -= region[5] * a[1 + 5 * BLOCK];
      t2 -= region[5] * a[2 + 5 * BLOCK];
      t3 -= region[5] * a[3 + 5 * BLOCK];

      t0 -= region[6] * a[0 + 6 * BLOCK];
      t1 -= region[6] * a[1 + 6 * BLOCK];
      t2 -= region[6] * a[2 + 6 * BLOCK];
      t3 -= region[6] * a[3 + 6 * BLOCK];

      t0 -= region[7] * a[0 + 7 * BLOCK];
      t1 -= region[7] * a[1 + 7 * BLOCK];
      t2 -= region[7] * a[2 + 7 * BLOCK];
      t3 -= region[7] * a[3 + 7 * BLOCK];
#if BLOCK>8
      t0 -= region[8] * a[0 + 8 * BLOCK];
      t1 -= region[8] * a[1 + 8 * BLOCK];
      t2 -= region[8] * a[2 + 8 * BLOCK];
      t3 -= region[8] * a[3 + 8 * BLOCK];

      t0 -= region[9] * a[0 + 9 * BLOCK];
      t1 -= region[9] * a[1 + 9 * BLOCK];
      t2 -= region[9] * a[2 + 9 * BLOCK];
      t3 -= region[9] * a[3 + 9 * BLOCK];

      t0 -= region[10] * a[0 + 10 * BLOCK];
      t1 -= region[10] * a[1 + 10 * BLOCK];
      t2 -= region[10] * a[2 + 10 * BLOCK];
      t3 -= region[10] * a[3 + 10 * BLOCK];

      t0 -= region[11] * a[0 + 11 * BLOCK];
      t1 -= region[11] * a[1 + 11 * BLOCK];
      t2 -= region[11] * a[2 + 11 * BLOCK];
      t3 -= region[11] * a[3 + 11 * BLOCK];

      t0 -= region[12] * a[0 + 12 * BLOCK];
      t1 -= region[12] * a[1 + 12 * BLOCK];
      t2 -= region[12] * a[2 + 12 * BLOCK];
      t3 -= region[12] * a[3 + 12 * BLOCK];

      t0 -= region[13] * a[0 + 13 * BLOCK];
      t1 -= region[13] * a[1 + 13 * BLOCK];
      t2 -= region[13] * a[2 + 13 * BLOCK];
      t3 -= region[13] * a[3 + 13 * BLOCK];

      t0 -= region[14] * a[0 + 14 * BLOCK];
      t1 -= region[14] * a[1 + 14 * BLOCK];
      t2 -= region[14] * a[2 + 14 * BLOCK];
      t3 -= region[14] * a[3 + 14 * BLOCK];

      t0 -= region[15] * a[0 + 15 * BLOCK];
      t1 -= region[15] * a[1 + 15 * BLOCK];
      t2 -= region[15] * a[2 + 15 * BLOCK];
      t3 -= region[15] * a[3 + 15 * BLOCK];
#endif
      region2[0] = t0;
      region2[1] = t1;
      region2[2] = t2;
      region2[3] = t3;
      region2+=4;
      a+=4;
    }
  } else {
#endif
    for (k = 0; k < n; ++k) {
      longDouble t00 = region2[k];
      for (j = 0; j < BLOCK; j ++) {
        t00 -= region[j] * a[k + j * BLOCK];
      }
      region2[k] = t00;
    }
#if BLOCK==16
  }
#endif
}
// Backward part of solve 1
void 
ClpCholeskyDense::solveB1Long(longDouble * a,int n,longDouble * region)
{
  int j, k;
  longDouble t00;
  for (j = n-1; j >=0; j --) {
    t00 = region[j];
    for (k = j+1; k < n; ++k) {
      t00 -= region[k] * a[k + j * BLOCK];
    }
    //t00*=a[j + j * BLOCK];
    region[j] = t00;
  }
}
// Backward part of solve 2
void 
ClpCholeskyDense::solveB2Long(longDouble * a,int n,longDouble * region, longDouble * region2)
{
  int j, k;
#if BLOCK==16
  if (n==BLOCK) {
    for (j = 0; j < BLOCK; j +=4) {
      longDouble t0 = region[0];
      longDouble t1 = region[1];
      longDouble t2 = region[2];
      longDouble t3 = region[3];
      t0 -= region2[0] * a[0 + 0*BLOCK];
      t1 -= region2[0] * a[0 + 1*BLOCK];
      t2 -= region2[0] * a[0 + 2*BLOCK];
      t3 -= region2[0] * a[0 + 3*BLOCK];

      t0 -= region2[1] * a[1 + 0*BLOCK];
      t1 -= region2[1] * a[1 + 1*BLOCK];
      t2 -= region2[1] * a[1 + 2*BLOCK];
      t3 -= region2[1] * a[1 + 3*BLOCK];

      t0 -= region2[2] * a[2 + 0*BLOCK];
      t1 -= region2[2] * a[2 + 1*BLOCK];
      t2 -= region2[2] * a[2 + 2*BLOCK];
      t3 -= region2[2] * a[2 + 3*BLOCK];

      t0 -= region2[3] * a[3 + 0*BLOCK];
      t1 -= region2[3] * a[3 + 1*BLOCK];
      t2 -= region2[3] * a[3 + 2*BLOCK];
      t3 -= region2[3] * a[3 + 3*BLOCK];

      t0 -= region2[4] * a[4 + 0*BLOCK];
      t1 -= region2[4] * a[4 + 1*BLOCK];
      t2 -= region2[4] * a[4 + 2*BLOCK];
      t3 -= region2[4] * a[4 + 3*BLOCK];

      t0 -= region2[5] * a[5 + 0*BLOCK];
      t1 -= region2[5] * a[5 + 1*BLOCK];
      t2 -= region2[5] * a[5 + 2*BLOCK];
      t3 -= region2[5] * a[5 + 3*BLOCK];

      t0 -= region2[6] * a[6 + 0*BLOCK];
      t1 -= region2[6] * a[6 + 1*BLOCK];
      t2 -= region2[6] * a[6 + 2*BLOCK];
      t3 -= region2[6] * a[6 + 3*BLOCK];

      t0 -= region2[7] * a[7 + 0*BLOCK];
      t1 -= region2[7] * a[7 + 1*BLOCK];
      t2 -= region2[7] * a[7 + 2*BLOCK];
      t3 -= region2[7] * a[7 + 3*BLOCK];
#if BLOCK>8

      t0 -= region2[8] * a[8 + 0*BLOCK];
      t1 -= region2[8] * a[8 + 1*BLOCK];
      t2 -= region2[8] * a[8 + 2*BLOCK];
      t3 -= region2[8] * a[8 + 3*BLOCK];

      t0 -= region2[9] * a[9 + 0*BLOCK];
      t1 -= region2[9] * a[9 + 1*BLOCK];
      t2 -= region2[9] * a[9 + 2*BLOCK];
      t3 -= region2[9] * a[9 + 3*BLOCK];

      t0 -= region2[10] * a[10 + 0*BLOCK];
      t1 -= region2[10] * a[10 + 1*BLOCK];
      t2 -= region2[10] * a[10 + 2*BLOCK];
      t3 -= region2[10] * a[10 + 3*BLOCK];

      t0 -= region2[11] * a[11 + 0*BLOCK];
      t1 -= region2[11] * a[11 + 1*BLOCK];
      t2 -= region2[11] * a[11 + 2*BLOCK];
      t3 -= region2[11] * a[11 + 3*BLOCK];

      t0 -= region2[12] * a[12 + 0*BLOCK];
      t1 -= region2[12] * a[12 + 1*BLOCK];
      t2 -= region2[12] * a[12 + 2*BLOCK];
      t3 -= region2[12] * a[12 + 3*BLOCK];

      t0 -= region2[13] * a[13 + 0*BLOCK];
      t1 -= region2[13] * a[13 + 1*BLOCK];
      t2 -= region2[13] * a[13 + 2*BLOCK];
      t3 -= region2[13] * a[13 + 3*BLOCK];

      t0 -= region2[14] * a[14 + 0*BLOCK];
      t1 -= region2[14] * a[14 + 1*BLOCK];
      t2 -= region2[14] * a[14 + 2*BLOCK];
      t3 -= region2[14] * a[14 + 3*BLOCK];

      t0 -= region2[15] * a[15 + 0*BLOCK];
      t1 -= region2[15] * a[15 + 1*BLOCK];
      t2 -= region2[15] * a[15 + 2*BLOCK];
      t3 -= region2[15] * a[15 + 3*BLOCK];
#endif
      region[0] = t0;
      region[1] = t1;
      region[2] = t2;
      region[3] = t3;
      a+=4*BLOCK;
      region+=4;
    }
  } else {
#endif
    for (j = 0; j < BLOCK; j ++) {
      longDouble t00 = region[j];
      for (k = 0; k < n; ++k) {
        t00 -= region2[k] * a[k + j * BLOCK];
      }
      region[j] = t00;
    }
#if BLOCK==16
  }
#endif
}
/* Uses factorization to solve. */
void 
ClpCholeskyDense::solveLongWork (longWork * region) 
{
  int numberBlocks = (numberRows_+BLOCK-1)>>BLOCKSHIFT;
  // later align on boundary
  longDouble * a = sparseFactor_+BLOCKSQ*numberBlocks;
  int iBlock;
  longDouble * aa = a;
  int iColumn;
  for (iBlock=0;iBlock<numberBlocks;iBlock++) {
    int nChunk;
    int jBlock;
    int iDo = iBlock*BLOCK;
    int base=iDo;
    if (iDo+BLOCK>numberRows_) {
      nChunk=numberRows_-iDo;
    } else {
      nChunk=BLOCK;
    }
    solveF1LongWork(aa,nChunk,region+iDo);
    for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
      base+=BLOCK;
      aa+=BLOCKSQ;
      if (base+BLOCK>numberRows_) {
	nChunk=numberRows_-base;
      } else {
	nChunk=BLOCK;
      }
      solveF2LongWork(aa,nChunk,region+iDo,region+base);
    }
    aa+=BLOCKSQ;
  }
  // do diagonal outside
  for (iColumn=0;iColumn<numberRows_;iColumn++) 
    region[iColumn] *= diagonal_[iColumn];
  int offset=((numberBlocks*(numberBlocks+1))>>1);
  aa = a+number_entries(offset-1);
  int lBase=(numberBlocks-1)*BLOCK;
  for (iBlock=numberBlocks-1;iBlock>=0;iBlock--) {
    int nChunk;
    int jBlock;
    int triBase=iBlock*BLOCK;
    int iBase=lBase;
    for (jBlock=iBlock+1;jBlock<numberBlocks;jBlock++) {
      if (iBase+BLOCK>numberRows_) {
	nChunk=numberRows_-iBase;
      } else {
	nChunk=BLOCK;
      }
      solveB2LongWork(aa,nChunk,region+triBase,region+iBase);
      iBase-=BLOCK;
      aa-=BLOCKSQ;
    }
    if (triBase+BLOCK>numberRows_) {
      nChunk=numberRows_-triBase;
    } else {
      nChunk=BLOCK;
    }
    solveB1LongWork(aa,nChunk,region+triBase);
    aa-=BLOCKSQ;
  }
}
// Forward part of solve 1
void 
ClpCholeskyDense::solveF1LongWork(longDouble * a,int n,longWork * region)
{
  int j, k;
  longWork t00;
  for (j = 0; j < n; j ++) {
    t00 = region[j];
    for (k = 0; k < j; ++k) {
      t00 -= region[k] * a[j + k * BLOCK];
    }
    //t00*=a[j + j * BLOCK];
    region[j] = t00;
  }
}
// Forward part of solve 2
void 
ClpCholeskyDense::solveF2LongWork(longDouble * a,int n,longWork * region, longWork * region2)
{
  int j, k;
#if BLOCK==16
  if (n==BLOCK) {
    for (k = 0; k < BLOCK; k+=4) {
      longWork t0 = region2[0];
      longWork t1 = region2[1];
      longWork t2 = region2[2];
      longWork t3 = region2[3];
      t0 -= region[0] * a[0 + 0 * BLOCK];
      t1 -= region[0] * a[1 + 0 * BLOCK];
      t2 -= region[0] * a[2 + 0 * BLOCK];
      t3 -= region[0] * a[3 + 0 * BLOCK];

      t0 -= region[1] * a[0 + 1 * BLOCK];
      t1 -= region[1] * a[1 + 1 * BLOCK];
      t2 -= region[1] * a[2 + 1 * BLOCK];
      t3 -= region[1] * a[3 + 1 * BLOCK];

      t0 -= region[2] * a[0 + 2 * BLOCK];
      t1 -= region[2] * a[1 + 2 * BLOCK];
      t2 -= region[2] * a[2 + 2 * BLOCK];
      t3 -= region[2] * a[3 + 2 * BLOCK];

      t0 -= region[3] * a[0 + 3 * BLOCK];
      t1 -= region[3] * a[1 + 3 * BLOCK];
      t2 -= region[3] * a[2 + 3 * BLOCK];
      t3 -= region[3] * a[3 + 3 * BLOCK];

      t0 -= region[4] * a[0 + 4 * BLOCK];
      t1 -= region[4] * a[1 + 4 * BLOCK];
      t2 -= region[4] * a[2 + 4 * BLOCK];
      t3 -= region[4] * a[3 + 4 * BLOCK];

      t0 -= region[5] * a[0 + 5 * BLOCK];
      t1 -= region[5] * a[1 + 5 * BLOCK];
      t2 -= region[5] * a[2 + 5 * BLOCK];
      t3 -= region[5] * a[3 + 5 * BLOCK];

      t0 -= region[6] * a[0 + 6 * BLOCK];
      t1 -= region[6] * a[1 + 6 * BLOCK];
      t2 -= region[6] * a[2 + 6 * BLOCK];
      t3 -= region[6] * a[3 + 6 * BLOCK];

      t0 -= region[7] * a[0 + 7 * BLOCK];
      t1 -= region[7] * a[1 + 7 * BLOCK];
      t2 -= region[7] * a[2 + 7 * BLOCK];
      t3 -= region[7] * a[3 + 7 * BLOCK];
#if BLOCK>8
      t0 -= region[8] * a[0 + 8 * BLOCK];
      t1 -= region[8] * a[1 + 8 * BLOCK];
      t2 -= region[8] * a[2 + 8 * BLOCK];
      t3 -= region[8] * a[3 + 8 * BLOCK];

      t0 -= region[9] * a[0 + 9 * BLOCK];
      t1 -= region[9] * a[1 + 9 * BLOCK];
      t2 -= region[9] * a[2 + 9 * BLOCK];
      t3 -= region[9] * a[3 + 9 * BLOCK];

      t0 -= region[10] * a[0 + 10 * BLOCK];
      t1 -= region[10] * a[1 + 10 * BLOCK];
      t2 -= region[10] * a[2 + 10 * BLOCK];
      t3 -= region[10] * a[3 + 10 * BLOCK];

      t0 -= region[11] * a[0 + 11 * BLOCK];
      t1 -= region[11] * a[1 + 11 * BLOCK];
      t2 -= region[11] * a[2 + 11 * BLOCK];
      t3 -= region[11] * a[3 + 11 * BLOCK];

      t0 -= region[12] * a[0 + 12 * BLOCK];
      t1 -= region[12] * a[1 + 12 * BLOCK];
      t2 -= region[12] * a[2 + 12 * BLOCK];
      t3 -= region[12] * a[3 + 12 * BLOCK];

      t0 -= region[13] * a[0 + 13 * BLOCK];
      t1 -= region[13] * a[1 + 13 * BLOCK];
      t2 -= region[13] * a[2 + 13 * BLOCK];
      t3 -= region[13] * a[3 + 13 * BLOCK];

      t0 -= region[14] * a[0 + 14 * BLOCK];
      t1 -= region[14] * a[1 + 14 * BLOCK];
      t2 -= region[14] * a[2 + 14 * BLOCK];
      t3 -= region[14] * a[3 + 14 * BLOCK];

      t0 -= region[15] * a[0 + 15 * BLOCK];
      t1 -= region[15] * a[1 + 15 * BLOCK];
      t2 -= region[15] * a[2 + 15 * BLOCK];
      t3 -= region[15] * a[3 + 15 * BLOCK];
#endif
      region2[0] = t0;
      region2[1] = t1;
      region2[2] = t2;
      region2[3] = t3;
      region2+=4;
      a+=4;
    }
  } else {
#endif
    for (k = 0; k < n; ++k) {
      longWork t00 = region2[k];
      for (j = 0; j < BLOCK; j ++) {
        t00 -= region[j] * a[k + j * BLOCK];
      }
      region2[k] = t00;
    }
#if BLOCK==16
  }
#endif
}
// Backward part of solve 1
void 
ClpCholeskyDense::solveB1LongWork(longDouble * a,int n,longWork * region)
{
  int j, k;
  longWork t00;
  for (j = n-1; j >=0; j --) {
    t00 = region[j];
    for (k = j+1; k < n; ++k) {
      t00 -= region[k] * a[k + j * BLOCK];
    }
    //t00*=a[j + j * BLOCK];
    region[j] = t00;
  }
}
// Backward part of solve 2
void 
ClpCholeskyDense::solveB2LongWork(longDouble * a,int n,longWork * region, longWork * region2)
{
  int j, k;
#if BLOCK==16
  if (n==BLOCK) {
    for (j = 0; j < BLOCK; j +=4) {
      longWork t0 = region[0];
      longWork t1 = region[1];
      longWork t2 = region[2];
      longWork t3 = region[3];
      t0 -= region2[0] * a[0 + 0*BLOCK];
      t1 -= region2[0] * a[0 + 1*BLOCK];
      t2 -= region2[0] * a[0 + 2*BLOCK];
      t3 -= region2[0] * a[0 + 3*BLOCK];

      t0 -= region2[1] * a[1 + 0*BLOCK];
      t1 -= region2[1] * a[1 + 1*BLOCK];
      t2 -= region2[1] * a[1 + 2*BLOCK];
      t3 -= region2[1] * a[1 + 3*BLOCK];

      t0 -= region2[2] * a[2 + 0*BLOCK];
      t1 -= region2[2] * a[2 + 1*BLOCK];
      t2 -= region2[2] * a[2 + 2*BLOCK];
      t3 -= region2[2] * a[2 + 3*BLOCK];

      t0 -= region2[3] * a[3 + 0*BLOCK];
      t1 -= region2[3] * a[3 + 1*BLOCK];
      t2 -= region2[3] * a[3 + 2*BLOCK];
      t3 -= region2[3] * a[3 + 3*BLOCK];

      t0 -= region2[4] * a[4 + 0*BLOCK];
      t1 -= region2[4] * a[4 + 1*BLOCK];
      t2 -= region2[4] * a[4 + 2*BLOCK];
      t3 -= region2[4] * a[4 + 3*BLOCK];

      t0 -= region2[5] * a[5 + 0*BLOCK];
      t1 -= region2[5] * a[5 + 1*BLOCK];
      t2 -= region2[5] * a[5 + 2*BLOCK];
      t3 -= region2[5] * a[5 + 3*BLOCK];

      t0 -= region2[6] * a[6 + 0*BLOCK];
      t1 -= region2[6] * a[6 + 1*BLOCK];
      t2 -= region2[6] * a[6 + 2*BLOCK];
      t3 -= region2[6] * a[6 + 3*BLOCK];

      t0 -= region2[7] * a[7 + 0*BLOCK];
      t1 -= region2[7] * a[7 + 1*BLOCK];
      t2 -= region2[7] * a[7 + 2*BLOCK];
      t3 -= region2[7] * a[7 + 3*BLOCK];
#if BLOCK>8

      t0 -= region2[8] * a[8 + 0*BLOCK];
      t1 -= region2[8] * a[8 + 1*BLOCK];
      t2 -= region2[8] * a[8 + 2*BLOCK];
      t3 -= region2[8] * a[8 + 3*BLOCK];

      t0 -= region2[9] * a[9 + 0*BLOCK];
      t1 -= region2[9] * a[9 + 1*BLOCK];
      t2 -= region2[9] * a[9 + 2*BLOCK];
      t3 -= region2[9] * a[9 + 3*BLOCK];

      t0 -= region2[10] * a[10 + 0*BLOCK];
      t1 -= region2[10] * a[10 + 1*BLOCK];
      t2 -= region2[10] * a[10 + 2*BLOCK];
      t3 -= region2[10] * a[10 + 3*BLOCK];

      t0 -= region2[11] * a[11 + 0*BLOCK];
      t1 -= region2[11] * a[11 + 1*BLOCK];
      t2 -= region2[11] * a[11 + 2*BLOCK];
      t3 -= region2[11] * a[11 + 3*BLOCK];

      t0 -= region2[12] * a[12 + 0*BLOCK];
      t1 -= region2[12] * a[12 + 1*BLOCK];
      t2 -= region2[12] * a[12 + 2*BLOCK];
      t3 -= region2[12] * a[12 + 3*BLOCK];

      t0 -= region2[13] * a[13 + 0*BLOCK];
      t1 -= region2[13] * a[13 + 1*BLOCK];
      t2 -= region2[13] * a[13 + 2*BLOCK];
      t3 -= region2[13] * a[13 + 3*BLOCK];

      t0 -= region2[14] * a[14 + 0*BLOCK];
      t1 -= region2[14] * a[14 + 1*BLOCK];
      t2 -= region2[14] * a[14 + 2*BLOCK];
      t3 -= region2[14] * a[14 + 3*BLOCK];

      t0 -= region2[15] * a[15 + 0*BLOCK];
      t1 -= region2[15] * a[15 + 1*BLOCK];
      t2 -= region2[15] * a[15 + 2*BLOCK];
      t3 -= region2[15] * a[15 + 3*BLOCK];
#endif
      region[0] = t0;
      region[1] = t1;
      region[2] = t2;
      region[3] = t3;
      a+=4*BLOCK;
      region+=4;
    }
  } else {
#endif
    for (j = 0; j < BLOCK; j ++) {
      longWork t00 = region[j];
      for (k = 0; k < n; ++k) {
        t00 -= region2[k] * a[k + j * BLOCK];
      }
      region[j] = t00;
    }
#if BLOCK==16
  }
#endif
}


syntax highlighted by Code2HTML, v. 0.9.1