/*  QRutil.c  */

#include "../FrontMtx.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
/*
   --------------------------------------------------------------------
   purpose -- to setup two data structures for a QR serial
              or multithreaded factorization

   rowsIVL[J]    -- list of rows of A to be assembled into front J
   firstnz[irow] -- column with location of leading nonzero of row in A

   created -- 98may29, cca
   --------------------------------------------------------------------
*/
void
FrontMtx_QR_setup (
   FrontMtx   *frontmtx,
   InpMtx     *mtxA,
   IVL        **prowsIVL,
   int        **pfirstnz,
   int        msglvl,
   FILE       *msgFile
) {
int   count, irow, jcol, J, loc, neqns, nfront, nrowA, rowsize ;
int   *firstnz, *head, *link, *list, *rowind, *vtxToFront ;
IVL   *rowsIVL ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontmtx == NULL || mtxA == NULL || prowsIVL == NULL
   || pfirstnz == NULL || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in FrontMtx_QR_setup()"
           "\n bad input\n") ;
   exit(-1) ; 
}
neqns      = FrontMtx_neqns(frontmtx) ;
nfront     = FrontMtx_nfront(frontmtx) ;
vtxToFront = ETree_vtxToFront(frontmtx->frontETree) ;
/*
   ----------------------------------------------------------------
   create the rowsIVL object,
   list(J) = list of rows that are assembled in front J
   firstnz[irowA] = first column with nonzero element in A(irowA,*)
   ----------------------------------------------------------------
*/
InpMtx_changeCoordType(mtxA, INPMTX_BY_ROWS) ;
InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
nrowA = 1 + IVmax(InpMtx_nent(mtxA), InpMtx_ivec1(mtxA), &loc) ;
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n nrowA = %d ", nrowA) ;
   fflush(msgFile) ;
}
firstnz = IVinit(nrowA, -1) ;
head    = IVinit(nfront, -1) ;
link    = IVinit(nrowA, -1) ;
for ( irow = nrowA - 1 ; irow >= 0 ; irow-- ) {
   InpMtx_vector(mtxA, irow, &rowsize, &rowind) ;
   if ( rowsize > 0 ) {
      firstnz[irow] = jcol = rowind[0] ;
      J             = vtxToFront[jcol] ;
      link[irow]    = head[J]          ;
      head[J]       = irow             ;
   }
}
rowsIVL = IVL_new() ;
IVL_init2(rowsIVL, IVL_CHUNKED, nfront, nrowA) ;
list = IVinit(neqns, -1) ;
for ( J = 0 ; J < nfront ; J++ ) {
   count = 0 ;
   for ( irow = head[J] ; irow != -1 ; irow = link[irow] ) {
      list[count++] = irow ;
   }
   if ( count > 0 ) {
      IVL_setList(rowsIVL, J, count, list) ;
   }
}
IVfree(head) ;
IVfree(link) ;
IVfree(list) ;
/*
   ---------------------------
   set the pointers for return
   ---------------------------
*/
*prowsIVL = rowsIVL ;
*pfirstnz = firstnz ;

return ; }

/*--------------------------------------------------------------------*/
/*
   -----------------------------------------------
   purpose --  visit front J during a serial 
               or multithreaded QR factorization

   cpus[1] -- initialize and load staircase matrix
   cpus[2] -- factor the matrix
   cpus[3] -- scale and store factor entries
   cpus[4] -- store update entries

   created -- 98may28, cca
   -----------------------------------------------
*/
void
FrontMtx_QR_factorVisit (
   FrontMtx     *frontmtx,
   int          J,
   InpMtx       *mtxA,
   IVL          *rowsIVL,
   int          firstnz[],
   ChvList      *updlist,
   ChvManager   *chvmanager,
   char         status[],
   int          colmap[],
   DV           *workDV,
   double       cpus[],
   double       *pfacops,
   int          msglvl,
   FILE         *msgFile
) {
/*
   ---------------
   check the input
   ---------------
*/
if ( frontmtx == NULL || mtxA == NULL || rowsIVL == NULL
     || firstnz == NULL || updlist == NULL || chvmanager == NULL 
     || status == NULL || colmap == NULL || workDV == NULL 
     || cpus == NULL || pfacops == NULL 
     || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(msgFile, "\n fatal error in FrontMtx_QR_factorVisit(%d)"
           "\n bad input\n", J) ;
   exit(-1) ;
}
/*
   ------------------------------------------------------------
   check to see if all incoming updates are present in the list
   ------------------------------------------------------------
*/
if ( ChvList_isCountZero(updlist, J) == 1 ) {
   A2       *frontJ ;
   Chv      *firstchild, *updchv ;
   double   ops, t1, t2 ;
   int      K ;
/*
   ----------------------------------------
   everything is ready to factor this front
   ----------------------------------------
*/
   firstchild = ChvList_getList(updlist, J) ;
/*
   ----------------------------------------
   initialize and load the staircase matrix
   ----------------------------------------
*/
   MARKTIME(t1) ;
   frontJ = FrontMtx_QR_assembleFront(frontmtx, J, mtxA, rowsIVL,
                                      firstnz, colmap, firstchild, 
                                      workDV, msglvl, msgFile) ;
   if ( firstchild != NULL ) {
      ChvManager_releaseListOfObjects(chvmanager, firstchild) ;
   }
   MARKTIME(t2) ;
   cpus[1] += t2 - t1 ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n\n after assembling front") ;
      A2_writeForHumanEye(frontJ, msgFile) ;
      fflush(msgFile) ;
   }
/*
   ---------------------------
   factor the staircase matrix
   ---------------------------
*/
   MARKTIME(t1) ;
   ops = A2_QRreduce(frontJ, workDV, msglvl, msgFile) ;
   *pfacops += ops ;
   MARKTIME(t2) ;
   cpus[2] += t2 - t1 ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n\n after factoring") ;
      A2_writeForHumanEye(frontJ, msgFile) ;
      fflush(msgFile) ;
   }
/*
   ----------------------------------
   scale and store the factor entries
   ----------------------------------
*/
   MARKTIME(t1) ;
   FrontMtx_QR_storeFront(frontmtx, J, frontJ, msglvl, msgFile) ;
   MARKTIME(t2) ;
   cpus[3] += t2 - t1 ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n\n after storing factor entries") ;
      A2_writeForHumanEye(frontJ, msgFile) ;
      fflush(msgFile) ;
   }
/*
   -----------------------
   store the update matrix
   -----------------------
*/
   if ( (K = frontmtx->tree->par[J]) != -1 ) {
      MARKTIME(t1) ;
      updchv = FrontMtx_QR_storeUpdate(frontmtx, J, frontJ, chvmanager,
                                       msglvl, msgFile) ;
      ChvList_addObjectToList(updlist, updchv, K) ;
      MARKTIME(t2) ;
      cpus[4] += t2 - t1 ;
      if ( msglvl > 3 ) {
         fprintf(msgFile, "\n\n after storing update entries") ;
         A2_writeForHumanEye(frontJ, msgFile) ;
         fflush(msgFile) ;
      }
   }
/*
   -------------------------
   free the staircase matrix
   -------------------------
*/
   A2_free(frontJ) ;
/*
   -------------------------------------
   set the status as finished and return
   -------------------------------------
*/
   status[J] = 'F' ;
}
return ; }

/*--------------------------------------------------------------------*/
/*
   --------------------------------------------------------------
   purpose -- create and return an A2 object that contains rows
              of A and rows from update matrices of the children.
              the matrix may not be in staircase form

   created -- 98may25, cca
   --------------------------------------------------------------
*/
A2 *
FrontMtx_QR_assembleFront (
   FrontMtx   *frontmtx,
   int        J,
   InpMtx     *mtxA,
   IVL        *rowsIVL,
   int        firstnz[],
   int        colmap[],
   Chv        *firstchild,
   DV         *workDV,
   int        msglvl,
   FILE       *msgFile
) {
A2       *frontJ ;
Chv      *chvI ;
double   *rowI, *rowJ, *rowentA ;
int      ii, irow, irowA, irowI, jcol, jj, jrow, ncolI, ncolJ, 
         nentA, nrowI, nrowJ, nrowFromA ;
int      *colindA, *colindI, *colindJ, *map, *rowids, *rowsFromA ;
/*
   ---------------
   check the input
   ---------------
*/
if ( frontmtx == NULL || mtxA == NULL || rowsIVL == NULL
   || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in FrontMtx_QR_assembleFront()"
           "\n bad input\n") ;
   exit(-1) ;
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n\n inside FrontMtx_QR_assembleFront(%d)", J) ;
   fflush(msgFile) ;
}
/*
   --------------------------------------------------
   set up the map from global to local column indices
   --------------------------------------------------
*/
FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ;
for ( jcol = 0 ; jcol < ncolJ ; jcol++ ) {
   colmap[colindJ[jcol]] = jcol ;
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n front %d's column indices", J) ;
   IVfprintf(msgFile, ncolJ, colindJ) ;
   fflush(msgFile) ;
}
/*
   -------------------------------------------------
   compute the size of the front and map the global 
   indices of the update matrices into local indices
   -------------------------------------------------
*/
IVL_listAndSize(rowsIVL, J, &nrowFromA, &rowsFromA) ;
nrowJ = nrowFromA ;
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n %d rows from A", nrowFromA) ;
   fflush(msgFile) ;
}
for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) {
   nrowJ += chvI->nD ;
   Chv_columnIndices(chvI, &ncolI, &colindI) ;
   for ( jcol = 0 ; jcol < ncolI ; jcol++ ) {
      colindI[jcol] = colmap[colindI[jcol]] ;
   }
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n %d rows from child %d", chvI->nD, chvI->id) ;
      fflush(msgFile) ;
   }
}
/*
   ----------------------------------------------------------
   get workspace for the rowids[nrowJ] and map[nrowJ] vectors
   ----------------------------------------------------------
*/
if ( sizeof(int) == sizeof(double) ) {
   DV_setSize(workDV, 2*nrowJ) ;
} else if ( 2*sizeof(int) == sizeof(double) ) {
   DV_setSize(workDV, nrowJ) ;
}
rowids = (int *) DV_entries(workDV) ;
map    = rowids + nrowJ ;
IVramp(nrowJ, rowids, 0, 1) ;
IVfill(nrowJ, map, -1) ;
/*
   -----------------------------------------------------------------
   get the map from incoming rows to their place in the front matrix
   -----------------------------------------------------------------
*/
for ( irow = jrow = 0 ; irow < nrowFromA ; irow++, jrow++ ) {
   irowA = rowsFromA[irow] ;
   map[jrow] = colmap[firstnz[irowA]] ;
}
for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) {
   nrowI = chvI->nD ;
   Chv_columnIndices(chvI, &ncolI, &colindI) ;
   for ( irow = 0 ; irow < nrowI ; irow++, jrow++ ) {
      map[jrow] = colindI[irow] ;
   }
}
IV2qsortUp(nrowJ, map, rowids) ;
for ( irow = 0 ; irow < nrowJ ; irow++ ) {
   map[rowids[irow]] = irow ;
}
/*
   ----------------------------
   allocate the A2 front object
   ----------------------------
*/
frontJ = A2_new() ;
A2_init(frontJ, frontmtx->type, nrowJ, ncolJ, ncolJ, 1, NULL) ;
A2_zero(frontJ) ;
/*
   ------------------------------------
   load the original rows of the matrix
   ------------------------------------
*/
for ( jrow = 0 ; jrow < nrowFromA ; jrow++ ) {
   irowA = rowsFromA[jrow] ;
   rowJ  = A2_row(frontJ, map[jrow]) ;
   if ( A2_IS_REAL(frontJ) ) {
      InpMtx_realVector(mtxA, irowA, &nentA, &colindA, &rowentA) ;
   } else if ( A2_IS_COMPLEX(frontJ) ) {
      InpMtx_complexVector(mtxA, irowA, &nentA, &colindA, &rowentA) ;
   }
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n loading row %d", irowA) ;
      fprintf(msgFile, "\n global indices") ;
      IVfprintf(msgFile, nentA, colindA) ;
      fflush(msgFile) ;
   }
   if ( A2_IS_REAL(frontJ) ) {
      for ( ii = 0 ; ii < nentA ; ii++ ) {
         jj = colmap[colindA[ii]] ;
         rowJ[jj] = rowentA[ii] ;
      }
   } else if ( A2_IS_COMPLEX(frontJ) ) {
      for ( ii = 0 ; ii < nentA ; ii++ ) {
         jj = colmap[colindA[ii]] ;
         rowJ[2*jj]   = rowentA[2*ii]   ;
         rowJ[2*jj+1] = rowentA[2*ii+1] ;
      }
   }
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n after assembling rows of A") ;
   A2_writeForHumanEye(frontJ, msgFile) ;
   fflush(msgFile) ;
}
/*
   ----------------------------------
   load the updates from the children 
   ----------------------------------
*/
for ( chvI = firstchild ; chvI != NULL ; chvI = chvI->next ) {
   nrowI = chvI->nD ;
   Chv_columnIndices(chvI, &ncolI, &colindI) ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n loading child %d", chvI->id) ;
      fprintf(msgFile, "\n child's column indices") ;
      IVfprintf(msgFile, ncolI, colindI) ;
      Chv_writeForHumanEye(chvI, msgFile) ;
      fflush(msgFile) ;
   }
   rowI = Chv_entries(chvI) ;
   for ( irowI = 0 ; irowI < nrowI ; irowI++, jrow++ ) {
      rowJ = A2_row(frontJ, map[jrow]) ;
      if ( A2_IS_REAL(frontJ) ) {
         for ( ii = irowI ; ii < ncolI ; ii++ ) {
            jj = colindI[ii] ;
            rowJ[jj] = rowI[ii] ;
         }
         rowI += ncolI - irowI - 1 ;
      } else if ( A2_IS_COMPLEX(frontJ) ) {
         for ( ii = irowI ; ii < ncolI ; ii++ ) {
            jj = colindI[ii] ;
            rowJ[2*jj]   = rowI[2*ii]   ;
            rowJ[2*jj+1] = rowI[2*ii+1] ;
         }
         rowI += 2*(ncolI - irowI - 1) ;
      }
   }
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n after assembling child %d", chvI->id) ;
      A2_writeForHumanEye(frontJ, msgFile) ;
      fflush(msgFile) ;
   }
}
return(frontJ) ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------
   store the factor entries of the reduced front matrix

   created -- 98may25, cca
   ----------------------------------------------------
*/
void
FrontMtx_QR_storeFront (
   FrontMtx   *frontmtx,
   int        J,
   A2         *frontJ,
   int        msglvl,
   FILE       *msgFile
) {
A2       tempA2 ;
double   fac, ifac, imag, real, rfac ;
double   *entDJJ, *entUJJ, *entUJN, *row ;
int      inc1, inc2, irow, jcol, ncol, ncolJ, nD, nentD, nentUJJ, 
         nfront, nrow, nU ;
int      *colind, *colindJ, *firstlocs, *sizes ;
SubMtx   *mtx ;
/*
   ---------------
   check the input
   ---------------
*/
if (  frontmtx == NULL || frontJ == NULL
   || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in FrontMtx_QR_storeFront()"
           "\n bad input\n") ;
   exit(-1) ;
}
nfront = FrontMtx_nfront(frontmtx) ;
FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ;
nrow   = A2_nrow(frontJ) ;
ncol   = A2_ncol(frontJ) ;
A2_setDefaultFields(&tempA2) ;
nD = FrontMtx_frontSize(frontmtx, J) ;
nU = ncol - nD ;
/*
   --------------------------------------
   scale the rows and square the diagonal
   --------------------------------------
*/
row = A2_entries(frontJ) ;
if ( A2_IS_REAL(frontJ) ) {
   for ( irow = 0 ; irow < nD ; irow++ ) {
      if ( row[irow] != 0.0 ) {
         fac = 1./row[irow] ;
         for ( jcol = irow + 1 ; jcol < ncol ; jcol++ ) {
            row[jcol] *= fac ;
         }
         row[irow] = row[irow] * row[irow] ;
      }
      row += ncol ;
   }
} else if ( A2_IS_COMPLEX(frontJ) ) {
   for ( irow = 0 ; irow < nD ; irow++ ) {
      real = row[2*irow] ; imag = row[2*irow+1] ;
      if (  real != 0.0 || imag != 0.0 ) {
         Zrecip(real, imag, &rfac, &ifac) ;
         ZVscale(ncol - irow - 1, & row[2*irow+2], rfac, ifac) ;
         row[2*irow]   = real*real + imag*imag ;
         row[2*irow+1] = 0.0 ;
      }
      row += 2*ncol ;
   }
}
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n after scaling rows of A") ;
   A2_writeForHumanEye(frontJ, msgFile) ;
   fflush(msgFile) ;
}
/*
   -------------------------
   copy the diagonal entries
   -------------------------
*/
mtx = FrontMtx_diagMtx(frontmtx, J) ;
SubMtx_diagonalInfo(mtx, &nentD, &entDJJ) ;
A2_subA2(&tempA2, frontJ, 0, nD-1, 0, nD-1) ;
A2_copyEntriesToVector(&tempA2, nentD, entDJJ, 
                       A2_DIAGONAL, A2_BY_ROWS) ;
SubMtx_columnIndices(mtx, &ncol, &colind) ;
IVcopy(nD, colind, colindJ) ;
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n diagonal factor matrix") ;
   SubMtx_writeForHumanEye(mtx, msgFile) ;
   fflush(msgFile) ;
}
if ( (mtx = FrontMtx_upperMtx(frontmtx, J, J)) != NULL ) {
/*
   ------------------------
   copy the U_{J,J} entries
   ------------------------
*/
   SubMtx_denseSubcolumnsInfo(mtx, &nD, &nentUJJ, 
                           &firstlocs, &sizes, &entUJJ) ;
   A2_copyEntriesToVector(&tempA2, nentUJJ, entUJJ, 
                          A2_STRICT_UPPER, A2_BY_COLUMNS) ;
   SubMtx_columnIndices(mtx, &ncol, &colind) ;
   IVcopy(nD, colind, colindJ) ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n UJJ factor matrix") ;
      SubMtx_writeForHumanEye(mtx, msgFile) ;
      fflush(msgFile) ;
   }
}
if ( ncolJ > nD ) {
/*
   -----------------------------
   copy the U_{J,bnd{J}} entries
   -----------------------------
*/
   mtx = FrontMtx_upperMtx(frontmtx, J, nfront) ;
   SubMtx_denseInfo(mtx, &nD, &nU, &inc1, &inc2, &entUJN) ;
   A2_subA2(&tempA2, frontJ, 0, nD-1, nD, ncolJ-1) ;
   A2_copyEntriesToVector(&tempA2, nD*nU, entUJN, 
                          A2_ALL_ENTRIES, A2_BY_COLUMNS) ;
   SubMtx_columnIndices(mtx, &ncol, &colind) ;
   IVcopy(nU, colind, colindJ + nD) ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n UJN factor matrix") ;
      SubMtx_writeForHumanEye(mtx, msgFile) ;
      fflush(msgFile) ;
   }
}
return ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------
   purpose -- to create and return a Chv object that
              holds the update matrix for front J

   created -- 98may25, cca
   -------------------------------------------------
*/
Chv *
FrontMtx_QR_storeUpdate (
   FrontMtx     *frontmtx,
   int          J,
   A2           *frontJ,
   ChvManager   *chvmanager,
   int          msglvl,
   FILE         *msgFile
) {
A2       tempJ ;
Chv      *chvJ ;
double   *updent ;
int      nbytes, ncolJ, ncolupd, nD, nent, nrowJ, nrowupd ;
int      *colindJ, *updind ;
/*
   -----------------------------------------------
   compute the number of rows in the update matrix
   -----------------------------------------------
*/
nD = FrontMtx_frontSize(frontmtx, J) ;
FrontMtx_columnIndices(frontmtx, J, &ncolJ, &colindJ) ;
nrowJ = A2_nrow(frontJ) ;
nrowupd = ((nrowJ >= ncolJ) ? ncolJ : nrowJ) - nD ;
ncolupd = ncolJ - nD ;
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n\n inside FrontMtx_QR_storeUpdate(%d)", J) ;
   fprintf(msgFile, "\n nD %d, nrowJ %d, nrowupd %d, ncolupd %d",
           nD, nrowJ, nrowupd, ncolupd) ;
   fflush(msgFile) ;
}
if ( nrowupd > 0 && ncolupd > 0 ) {
   if ( FRONTMTX_IS_REAL(frontmtx) ) {
      nbytes = Chv_nbytesNeeded(nrowupd, 0, ncolupd - nrowupd, 
                                SPOOLES_REAL, SPOOLES_SYMMETRIC) ;
   } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) {
      nbytes = Chv_nbytesNeeded(nrowupd, 0, ncolupd - nrowupd, 
                                SPOOLES_COMPLEX, SPOOLES_HERMITIAN) ;
   }
   chvJ = ChvManager_newObjectOfSizeNbytes(chvmanager, nbytes) ;
   if ( FRONTMTX_IS_REAL(frontmtx) ) {
       Chv_init(chvJ, J, nrowupd, 0, ncolupd - nrowupd, 
                SPOOLES_REAL, SPOOLES_SYMMETRIC) ;
   } else if ( FRONTMTX_IS_COMPLEX(frontmtx) ) {
       Chv_init(chvJ, J, nrowupd, 0, ncolupd - nrowupd, 
                SPOOLES_COMPLEX, SPOOLES_HERMITIAN) ;
   }
   Chv_columnIndices(chvJ, &ncolupd, &updind) ;
   IVcopy(ncolupd, updind, colindJ + nD) ;
   nent   = Chv_nent(chvJ) ;
   updent = Chv_entries(chvJ) ;
   A2_setDefaultFields(&tempJ) ;
   A2_subA2(&tempJ, frontJ, nD, nrowJ - 1, nD, ncolJ - 1) ;
   A2_copyEntriesToVector(&tempJ, nent, updent, A2_UPPER, A2_BY_ROWS) ;
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n update matrix %d", J) ;
      Chv_writeForHumanEye(chvJ, msgFile) ;
      fflush(msgFile) ;
   }
} else {
   chvJ = NULL ;
}
return(chvJ) ; }

/*--------------------------------------------------------------------*/


syntax highlighted by Code2HTML, v. 0.9.1