/*  testGridMPI.c  */

#include "../spoolesMPI.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
void mkNDlinsys ( int n1, int n2, int n3, int maxzeros, int maxsize,
   int type, int symmetryflag, int nrhs, int seed, int msglvl,
   FILE *msgFile, ETree **pfrontETree, IVL **psymbfacIVL,
   InpMtx **pmtxA, DenseMtx **pmtxX, DenseMtx **pmtxB ) ;
/*--------------------------------------------------------------------*/
int
main ( int argc, char *argv[] )
/*
*/
{
Chv             *rootchv ;
ChvManager      *chvmanager ;
DenseMtx        *mtxB, *mtxkeep, *mtxX, *mtxZ ;
double          cputotal, cutoff, droptol, error, factorops, 
                solvecpu, solveops, tau, terror, t1, t2 ;
double          cpus[14], tcpus[14] ;
DV              *cumopsDV ;
ETree           *frontETree ;
FILE            *msgFile ;
FrontMtx        *frontmtx ;
InpMtx          *mtxA, *mtxAkeep ;
int             factorerror, lookahead, maptype, maxsize, maxzeros, 
                msglvl, myid, neqns, nproc, nrhs, nrow, nzf, n1, n2, 
                n3, pivotingflag, seed, sparsityflag, symmetryflag, 
                tag, type ;
int             *rowindX, *rowindZ ;
int             stats[17], tstats[17] ;
IV              *frontOwnersIV, *vtxmapIV ;
IVL             *symbfacIVL ;
SolveMap        *solvemap ;
SubMtxManager   *factormanager, *solvemanager ;
/*
   ---------------------------------------------------------------
   find out the identity of this process and the number of process
   ---------------------------------------------------------------
*/
MPI_Init(&argc, &argv) ;
MPI_Comm_rank(MPI_COMM_WORLD, &myid) ;
MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;
fprintf(stdout, "\n process %d of %d, argc = %d", myid, nproc, argc) ;
fflush(stdout) ;
if ( argc != 19 ) {
   fprintf(stdout, 
      "\n\n usage : %s "
      "\n         msglvl msgFile n1 n2 n3 maxzeros maxsize seed "
      "\n         type symmetryflag sparsityflag pivotingflag "
      "\n         tau droptol lookahead nrhs maptype cutoff"
      "\n    msglvl       -- message level"
      "\n    msgFile      -- message file"
      "\n    n1           -- # of points in first dimension"
      "\n    n2           -- # of points in second dimension"
      "\n    n3           -- # of points in third dimension"
      "\n    maxzeros     -- max # of zeros in a front"
      "\n    maxsize      -- max # of internal vertices in a front"
      "\n    seed         -- random number seed"
      "\n    type         -- type of entries"
      "\n       1 --> real "
      "\n       2 --> complex "
      "\n    symmetryflag -- symmetry flag"
      "\n       0 --> symmetric "
      "\n       1 --> hermitian "
      "\n       2 --> nonsymmetric "
      "\n    sparsityflag -- sparsity flag"
      "\n       0 --> store dense fronts"
      "\n       1 --> store sparse fronts, use droptol to drop entries"
      "\n    pivotingflag -- pivoting flag"
      "\n       0 --> do not pivot"
      "\n       1 --> enable pivoting"
      "\n    tau     -- upper bound on factor entries"
      "\n               used only with pivoting"
      "\n    droptol -- lower bound on factor entries"
      "\n               used only with sparse fronts"
      "\n    lookahead -- parameter to schedule computation"
      "\n       0 --> no lookahead"
      "\n       nonzero --> look up the tree this many steps"
      "\n    nrhs -- number of right hand sides"
      "\n    maptype -- type of factorization map"
      "\n       1 --> wrap map"
      "\n       2 --> balanced map via post-order traversal"
      "\n       3 --> subtree-subset map"
      "\n       4 --> domain decomposition map"
      "\n    cutoff -- used when maptype = 4"
      "\n       0 < cutoff < 1 to define multisector"
      "\n       try cutoff = 1/nproc or 1/(2*nproc)"
      "\n", argv[0]) ;
   { int ii ;
   fprintf(stdout, "\n\n argc = %d", argc) ;
   for ( ii = 0 ; ii < argc ; ii++ ) {
      fprintf(stdout, "\n arg %d = %s", ii, argv[ii]) ;
   }
   }
   MPI_Finalize() ;
   return(0) ;
}
msglvl = atoi(argv[1]) ;
if ( strcmp(argv[2], "stdout") == 0 ) {
   msgFile = stdout ;
} else {
   int    length = strlen(argv[2]) + 1 + 4 ;
   char   *buffer = CVinit(length, '\0') ;
   sprintf(buffer, "%s.%d", argv[2], myid) ;
   if ( (msgFile = fopen(buffer, "w")) == NULL ) {
      fprintf(stderr, "\n fatal error in %s"
              "\n unable to open file %s\n",
              argv[0], buffer) ;
      return(-1) ;
   }
   CVfree(buffer) ;
}
n1           = atoi(argv[ 3]) ;
n2           = atoi(argv[ 4]) ;
n3           = atoi(argv[ 5]) ;
maxzeros     = atoi(argv[ 6]) ;
maxsize      = atoi(argv[ 7]) ;
seed         = atoi(argv[ 8]) ;
type         = atoi(argv[ 9]) ;
symmetryflag = atoi(argv[10]) ;
sparsityflag = atoi(argv[11]) ;
pivotingflag = atoi(argv[12]) ;
tau          = atof(argv[13]) ;
droptol      = atof(argv[14]) ;
lookahead    = atoi(argv[15]) ;
nrhs         = atoi(argv[16]) ;
maptype      = atoi(argv[17]) ;
cutoff       = atof(argv[18]) ;
fprintf(msgFile, "\n %s :"
        "\n   msglvl       = %d"
        "\n   msgFile      = %s"
        "\n   n1           = %d"
        "\n   n2           = %d"
        "\n   n3           = %d"
        "\n   maxzeros     = %d"
        "\n   maxsize      = %d"
        "\n   seed         = %d"
        "\n   type         = %d"
        "\n   symmetryflag = %d"
        "\n   sparsityflag = %d"
        "\n   pivotingflag = %d"
        "\n   tau          = %f"
        "\n   droptol      = %f"
        "\n   lookahead    = %d"
        "\n   nrhs         = %d"
        "\n   maptype      = %d"
        "\n   cutoff       = %f"
        "\n",
        argv[0], msglvl, argv[2], n1, n2, n3, maxzeros, maxsize, seed,
        type, symmetryflag, sparsityflag, pivotingflag, tau, droptol,
        lookahead, nrhs, maptype, cutoff) ;
fflush(msgFile) ;
neqns = n1*n2*n3 ;
/*
   ---------------------------------------------------------------
   find out the identity of this process and the number of process
   ---------------------------------------------------------------
*/
MPI_Comm_rank(MPI_COMM_WORLD, &myid) ;
MPI_Comm_size(MPI_COMM_WORLD, &nproc) ;
if ( myid == 0 ) {
/*
   --------------------------
   generate the linear system
   --------------------------
*/
   mkNDlinsys(n1, n2, n3, maxzeros, maxsize, type, symmetryflag, nrhs,
              seed, msglvl, msgFile, &frontETree, &symbfacIVL, &mtxA,
              &mtxX, &mtxB) ;
/*
   -------------------------------
   free the symbolic factorization
   -------------------------------
*/
   IVL_free(symbfacIVL) ;
/*
   -------------------------------------------
   send the front tree to the other processors
   -------------------------------------------
*/
   frontETree = ETree_MPI_Bcast(frontETree, 0, 
                                msglvl, msgFile, MPI_COMM_WORLD) ;
} else {
/*
   ---------------------------------------
   receive the front tree from processor 0
   ---------------------------------------
*/
   frontETree = ETree_MPI_Bcast(NULL, 0, 
                                msglvl, msgFile, MPI_COMM_WORLD) ;
/*
   ------------------------------
   create the objects for X and B
   ------------------------------
*/
   mtxX = DenseMtx_new() ;
   mtxB = DenseMtx_new() ;
   DenseMtx_init(mtxX, type, -1, -1, 0, nrhs, 1, 0) ;
   DenseMtx_init(mtxB, type, -1, -1, 0, nrhs, 1, 0) ;
/*
   -----------------------
   create the object for A
   -----------------------
*/
   mtxA = InpMtx_new() ;
   InpMtx_init(mtxA, INPMTX_BY_CHEVRONS, type, 0, 0) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n front ETree object") ;
   ETree_writeForHumanEye(frontETree, msgFile) ;
   fprintf(msgFile, "\n\n mtxX") ;
   DenseMtx_writeForHumanEye(mtxX, msgFile) ;
   fprintf(msgFile, "\n\n mtxB") ;
   DenseMtx_writeForHumanEye(mtxB, msgFile) ;
   fprintf(msgFile, "\n\n mtxA") ;
   InpMtx_writeForHumanEye(mtxA, msgFile) ;
}
/*
   ---------------------------------
   generate the owners map IV object
   ---------------------------------
*/
cumopsDV = DV_new() ;
DV_init(cumopsDV, nproc, NULL) ;
DV_fill(cumopsDV, 0.0) ;
switch ( maptype ) {
case 1 :
   frontOwnersIV = ETree_wrapMap(frontETree, type, symmetryflag,
                                 cumopsDV) ;
   break ;
case 2 :
   frontOwnersIV = ETree_balancedMap(frontETree, type, symmetryflag,
                                     cumopsDV) ;
   break ;
case 3 :
   frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type, 
                                          symmetryflag, cumopsDV) ;
   break ;
case 4 :
   frontOwnersIV = ETree_ddMap(frontETree, type, symmetryflag,
                               cumopsDV, cutoff) ;
   break ;
default :
   fprintf(stderr, "\n fatal error, maptype = %d", maptype) ;
   MPI_Finalize() ;
   exit(-1) ;
   break ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n owners map object") ;
   IV_writeForHumanEye(frontOwnersIV, msgFile) ;
}
fprintf(msgFile, "\n\n upper bound on load balance = %4.2f\n",
        DV_sum(cumopsDV)/DV_max(cumopsDV)) ;
DV_free(cumopsDV) ;
/*
   ---------------------
   create the vertex map
   ---------------------
*/
vtxmapIV = IV_new() ;
IV_init(vtxmapIV, neqns, NULL) ;
IVgather(neqns, IV_entries(vtxmapIV), IV_entries(frontOwnersIV), 
         ETree_vtxToFront(frontETree)) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n vertex map object") ;
   IV_writeForHumanEye(vtxmapIV, msgFile) ;
}
/*
   ------------------
   split the X matrix
   ------------------
*/
tag = 1 ;
stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
MARKTIME(t1) ;
mtxkeep = DenseMtx_MPI_splitByRows(mtxX, vtxmapIV, stats, msglvl,
                                   msgFile, tag, MPI_COMM_WORLD) ;
DenseMtx_free(mtxX) ;
mtxX = mtxkeep ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : split mtxX", t2 - t1) ;
fprintf(msgFile, 
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
           tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
   fflush(msgFile) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n mtxX object") ;
   DenseMtx_writeForHumanEye(mtxX, msgFile) ;
}
/*
   ------------------
   split the B matrix
   ------------------
*/
tag = 1 ;
stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
MARKTIME(t1) ;
mtxkeep = DenseMtx_MPI_splitByRows(mtxB, vtxmapIV, stats, msglvl,
                                   msgFile, tag, MPI_COMM_WORLD) ;
DenseMtx_free(mtxB) ;
mtxB = mtxkeep ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : split mtxB", t2 - t1) ;
fprintf(msgFile, 
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
           tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
   fflush(msgFile) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n mtxB object") ;
   DenseMtx_writeForHumanEye(mtxB, msgFile) ;
}
/*
   ------------------
   split the A matrix
   ------------------
*/
tag = 1 ;
stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
MARKTIME(t1) ;
InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
mtxAkeep = InpMtx_MPI_split(mtxA, vtxmapIV, stats, msglvl,
                            msgFile, tag, MPI_COMM_WORLD) ;
InpMtx_free(mtxA) ;
mtxA = mtxAkeep ;
InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : split mtxA", t2 - t1) ;
fprintf(msgFile, 
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
           tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
   fflush(msgFile) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n mtxA object") ;
   InpMtx_writeForHumanEye(mtxA, msgFile) ;
}
/*
   ----------------------------------
   compute the symbolic factorization
   ----------------------------------
*/
stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
MARKTIME(t1) ;
symbfacIVL = SymbFac_MPI_initFromInpMtx(frontETree, frontOwnersIV, mtxA,
                          stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : symbolic factorization", t2 - t1) ;
fprintf(msgFile, 
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
           tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
   fflush(msgFile) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n symbolic factorization object") ;
   IVL_writeForHumanEye(symbfacIVL, msgFile) ;
}
/*
   ----------------------------------
   initialize the front matrix object
   ----------------------------------
*/
factormanager = SubMtxManager_new() ;
SubMtxManager_init(factormanager, 0, 0) ;
MARKTIME(t1) ;
frontmtx = FrontMtx_new() ;
FrontMtx_init(frontmtx, frontETree, symbfacIVL, type, symmetryflag, 
              sparsityflag, pivotingflag, NO_LOCK, myid, frontOwnersIV, 
              factormanager, msglvl, msgFile) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
        t2 - t1) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n front matrix initialized") ;
   FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   -----------------------
   factor the front matrix
   -----------------------
*/
nzf       = ETree_nFactorEntries(frontETree, symmetryflag) ;
factorops = ETree_nFactorOps(frontETree, type, symmetryflag) ;
IVzero(17, stats) ;
DVzero(14, cpus) ;
chvmanager = ChvManager_new() ;
ChvManager_init(chvmanager, NO_LOCK, 0) ;
MARKTIME(t1) ;
rootchv = FrontMtx_MPI_factorInpMtx(frontmtx, mtxA, tau, droptol,
                    chvmanager, frontOwnersIV, lookahead, &factorerror,
                    cpus, stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : factor matrix, %8.3f mflops",
        t2 - t1, 1.e-6*factorops/(t2-t1)) ;
if ( factorerror >= 0 ) {
   fprintf(msgFile, "\n\n error found in front %d", factorerror) ;
   exit(-1) ;
}
if ( rootchv != NULL ) {
   Chv   *chv ;
   fprintf(msgFile, "\n\n factorization did not complete") ;
   for ( chv = rootchv ; chv != NULL ; chv = chv->next ) {
      fprintf(stdout, "\n chv %d, nD = %d, nL = %d, nU = %d",
              chv->id, chv->nD, chv->nL, chv->nU) ;
      Chv_writeForHumanEye(chv, msgFile) ;
   }
}
DVzero(14, tcpus) ;
MPI_Reduce((void *) cpus, (void *) tcpus, 14, MPI_DOUBLE,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
cputotal = DVsum(14, tcpus) ;
if ( cputotal > 0.0 ) {
   fprintf(msgFile,
   "\n    initialize fronts         %8.3f %6.2f"
   "\n    load fronts               %8.3f %6.2f"
   "\n    update fronts             %8.3f %6.2f"
   "\n    aggregate insert          %8.3f %6.2f"
   "\n    aggregate remove/add      %8.3f %6.2f"
   "\n    assemble postponed data   %8.3f %6.2f"
   "\n    factor fronts             %8.3f %6.2f"
   "\n    extract postponed data    %8.3f %6.2f"
   "\n    store factor entries      %8.3f %6.2f"
   "\n    post initial receives     %8.3f %6.2f"
   "\n    check for recv'd messages %8.3f %6.2f"
   "\n    post initial sends        %8.3f %6.2f"
   "\n    check for sent messages   %8.3f %6.2f"
   "\n    total time                %8.3f",
   tcpus[0], 100.*tcpus[0]/cputotal,
   tcpus[1], 100.*tcpus[1]/cputotal,
   tcpus[2], 100.*tcpus[2]/cputotal,
   tcpus[3], 100.*tcpus[3]/cputotal,
   tcpus[4], 100.*tcpus[4]/cputotal,
   tcpus[5], 100.*tcpus[5]/cputotal,
   tcpus[6], 100.*tcpus[6]/cputotal,
   tcpus[7], 100.*tcpus[7]/cputotal,
   tcpus[8], 100.*tcpus[8]/cputotal,
   tcpus[9], 100.*tcpus[9]/cputotal, 
   tcpus[10], 100.*tcpus[10]/cputotal, 
   tcpus[11], 100.*tcpus[11]/cputotal, 
   tcpus[12], 100.*tcpus[12]/cputotal, cputotal) ;
}
fprintf(msgFile, 
     "\n\n Local statistics"
     "\n    %d pivots, %d pivot tests, %d delayed rows and columns"
     "\n    %d entries in D, %d entries in L, %d entries in U"
     "\n    %d active Chv objects, %d active bytes, %d requested bytes",
     stats[0], stats[1], stats[2], stats[3], stats[4], stats[5],
     stats[14], stats[15], stats[16]) ;
fprintf(msgFile, 
        "\n    local comm  : # sent   bytes sent   # recv   bytes recv"
        "\n    aggregate     %6d %12d   %6d %12d"
        "\n    postponed     %6d %12d   %6d %12d",
        stats[6],  stats[7],  stats[8],  stats[9], 
        stats[10], stats[11], stats[12], stats[13]) ;
IVzero(11, tstats) ;
MPI_Reduce((void *) stats, (void *) tstats, 17, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, 
     "\n\n Global statistics"
     "\n    %d pivots, %d pivot tests, %d delayed rows and columns"
     "\n    %d entries in D, %d entries in L, %d entries in U"
     "\n    %d active Chv objects, %d active bytes, %d requested bytes",
        tstats[0], tstats[1], tstats[2], 
        tstats[3], tstats[4], tstats[5],
        tstats[14], tstats[15], tstats[16]) ;
   fprintf(msgFile, 
        "\n    global comm  : # sent   bytes sent   # recv   bytes recv"
        "\n    aggregate      %6d %12d   %6d %12d"
        "\n    postponed      %6d %12d   %6d %12d",
        tstats[6],  tstats[7],  tstats[8],  tstats[9], 
        tstats[10], tstats[11], tstats[12], tstats[13]) ;
   if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
      solveops = 2*(tstats[3] + tstats[4] + tstats[5]) ;
   } else {
      solveops = 2*(tstats[3] + 2*tstats[5]) ;
   }
   if ( FRONTMTX_IS_COMPLEX(frontmtx) ) {
      solveops *= 4 ;
   }
   solveops *= nrhs ;
   fprintf(msgFile, "\n\n solve operations = %.0f", solveops) ;
}
SubMtxManager_writeForHumanEye(factormanager, msgFile) ;
ChvManager_writeForHumanEye(chvmanager, msgFile) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n front factor matrix") ;
   FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   ----------------------------------------------------------
   redistribute the right hand side and solution if necessary
   ----------------------------------------------------------
*/
MARKTIME(t1) ;
if ( FRONTMTX_IS_PIVOTING(frontmtx) ) { 
   IV   *colmapIV ;

   colmapIV = FrontMtx_MPI_colmapIV(frontmtx, frontOwnersIV,
                                    msglvl, msgFile, MPI_COMM_WORLD) ;
   if ( msglvl > 2 ) {
      fprintf(msgFile, "\n\n column map IV object") ;
      IV_writeForHumanEye(colmapIV, msgFile) ;
      fflush(msgFile) ;
   }
   stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
   tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
   MARKTIME(t1) ;
   mtxkeep = DenseMtx_MPI_splitByRows(mtxX, colmapIV, stats, msglvl,
                                      msgFile, tag, MPI_COMM_WORLD) ;
   DenseMtx_free(mtxX) ;
   mtxX = mtxkeep ;
   MARKTIME(t2) ;
   fprintf(msgFile, 
           "\n\n CPU %8.3f : solution matrix split ", t2 - t1) ;
   fprintf(msgFile, 
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
   fflush(msgFile) ;
   MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
             MPI_SUM, 0, MPI_COMM_WORLD) ;
   if ( myid == 0 ) {
      fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
              tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
      fflush(msgFile) ;
   }
   stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
   tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
   if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
      IV   *rowmapIV ;

      rowmapIV = FrontMtx_MPI_rowmapIV(frontmtx, frontOwnersIV, msglvl, 
                                       msgFile, MPI_COMM_WORLD) ;
      if ( msglvl > 2 ) {
         fprintf(msgFile, "\n\n row map IV object") ;
         IV_writeForHumanEye(rowmapIV, msgFile) ;
         fflush(msgFile) ;
      }
      MARKTIME(t1) ;
      mtxkeep = DenseMtx_MPI_splitByRows(mtxB, rowmapIV, stats, msglvl,
                                      msgFile, tag, MPI_COMM_WORLD) ;
      DenseMtx_free(mtxB) ;
      mtxB = mtxkeep ;
      MARKTIME(t2) ;
      fprintf(msgFile, "\n CPU %8.3f : rhs matrix split ", t2 - t1) ;
      IV_free(rowmapIV) ;
   } else {
      IVfill(4, stats, 0) ;
      MARKTIME(t1) ;
      mtxkeep = DenseMtx_MPI_splitByRows(mtxB, colmapIV, stats, msglvl,
                                      msgFile, tag, MPI_COMM_WORLD) ;
      DenseMtx_free(mtxB) ;
      mtxB = mtxkeep ;
      MARKTIME(t2) ;
      fprintf(msgFile, "\n CPU %8.3f : rhs matrix split ", t2 - t1) ;
   }
   fprintf(msgFile, 
        "\n\n Redistibute rhs and solution for this process"
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
   fflush(msgFile) ;
   MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
             MPI_SUM, 0, MPI_COMM_WORLD) ;
   if ( myid == 0 ) {
      fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
              tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
      fflush(msgFile) ;
   }
   if ( msglvl > 2 ) {
      fprintf(msgFile, "\n\n solution matrix") ;
      DenseMtx_writeForHumanEye(mtxX, msgFile) ;
      fprintf(msgFile, "\n\n right hand side matrix") ;
      DenseMtx_writeForHumanEye(mtxB, msgFile) ;
      fflush(msgFile) ;
   }
   IV_free(colmapIV) ;
}
MARKTIME(t2) ;
fprintf(msgFile, "\n\n CPU %8.3f : solution and rhs set up ", t2-t1);
/*
   -----------------------------------------------------------------
   post-process the matrix and convert to a submatrix representation
   -----------------------------------------------------------------
*/
stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
MARKTIME(t1) ;
FrontMtx_MPI_postProcess(frontmtx, frontOwnersIV, 
                         stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : post-process the factor matrix",
        t2 - t1) ;
fprintf(msgFile, 
        "\n Post-process communication for this process"
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
           tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
   fflush(msgFile) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n after post-processing, front factor matrix") ;
   FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   ---------------------------
   create the solve map object
   ---------------------------
*/
MARKTIME(t1) ;
solvemap = SolveMap_new() ;
if (  FRONTMTX_IS_NONSYMMETRIC(frontmtx)
   && FRONTMTX_IS_PIVOTING(frontmtx) ) {
   SolveMap_ddMap(solvemap, SPOOLES_NONSYMMETRIC, 
                  frontmtx->upperblockIVL, frontmtx->lowerblockIVL, 
                  nproc, frontOwnersIV, frontmtx->tree, 
                  seed, msglvl, msgFile);
} else {
   SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC, frontmtx->upperblockIVL, 
                  NULL, nproc, frontOwnersIV, frontmtx->tree, 
                  seed, msglvl, msgFile) ;
}
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : solve map created", t2 - t1) ;
if ( msglvl > 2 ) {
   SolveMap_writeForHumanEye(solvemap, msgFile) ;
} else {
   SolveMap_writeStats(solvemap, msgFile) ;
}
fflush(msgFile) ;
/*
   --------------------------------------------------
   split the front matrix to conform to the solve map
   --------------------------------------------------
*/
stats[0]  = stats[1]  = stats[2]  = stats[3]  = 0 ;
tstats[0] = tstats[1] = tstats[2] = tstats[3] = 0 ;
MARKTIME(t1) ;
FrontMtx_MPI_split(frontmtx, solvemap, 
                   stats, msglvl, msgFile, tag, MPI_COMM_WORLD) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n\n CPU %8.3f : front matrix split", t2 - t1) ;
fprintf(msgFile, 
        "\n Split front matrix communication for this process"
        "\n                 # sent   bytes sent   # recv   bytes recv"
        "\n local comm  : %6d %12d   %6d %12d",
        stats[0],  stats[2],  stats[1],  stats[3]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 4, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global comm : %6d %12d   %6d %12d\n",
           tstats[0],  tstats[2],  tstats[1],  tstats[3]) ;
   fflush(msgFile) ;
}
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n\n after splitting, front factor matrix") ;
   FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
   fflush(msgFile) ;
}
/*
   -----------------------
   solve the linear system
   -----------------------
*/
solvemanager = SubMtxManager_new() ;
SubMtxManager_init(solvemanager, 0, 0) ;
IVzero(8,  stats) ;
IVzero(8,  tstats) ;
DVzero(5, cpus) ;
mtxZ = DenseMtx_new() ;
DenseMtx_init(mtxZ, type, 0, 0, mtxX->nrow, mtxX->ncol, 1, mtxX->nrow) ;
DenseMtx_zero(mtxZ) ;
DenseMtx_rowIndices(mtxX, &nrow, &rowindX) ;
DenseMtx_rowIndices(mtxZ, &nrow, &rowindZ) ;
IVcopy(nrow, rowindZ, rowindX) ;
MARKTIME(t1) ;
FrontMtx_MPI_solve(frontmtx, mtxZ, mtxB, solvemanager, solvemap, 
                   cpus, stats, msglvl, msgFile, tag, MPI_COMM_WORLD);
MARKTIME(t2) ;
solvecpu = t2 - t1 ;
fprintf(msgFile, "\n CPU %8.3f : solve done ", solvecpu) ;
/*
fprintf(msgFile, "\n\n cpus") ;
DVfprintf(msgFile, 6, cpus) ;
*/
DVzero(6, tcpus) ;
MPI_Reduce((void *) cpus, (void *) tcpus, 6, MPI_DOUBLE,
           MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, ", %.3f mflops", 1.e-6*solveops/(t2-t1)) ;
/*
   fprintf(msgFile, "\n\n tcpus") ;
   DVfprintf(msgFile, 6, tcpus) ;
*/
   solvecpu = DVsum(6, tcpus) ;
   fprintf(msgFile, "\n\n Solve CPU breakdown:") ;
   if ( solvecpu > 0.0 ) {
      fprintf(msgFile,
      "\n    set up solves               %8.3f %6.2f"
      "\n    load rhs and store solution %8.3f %6.2f"
      "\n    forward solve               %8.3f %6.2f"
      "\n    diagonal solve              %8.3f %6.2f"
      "\n    backward solve              %8.3f %6.2f"
      "\n    miscellaneous               %8.3f %6.2f"
      "\n    total time                  %8.3f",
      tcpus[0], 100.*tcpus[0]/solvecpu,
      tcpus[1], 100.*tcpus[1]/solvecpu,
      tcpus[2], 100.*tcpus[2]/solvecpu,
      tcpus[3], 100.*tcpus[3]/solvecpu,
      tcpus[4], 100.*tcpus[4]/solvecpu, 
      tcpus[5], 100.*tcpus[5]/solvecpu, solvecpu) ;
   }
}
fprintf(msgFile, 
        "\n\n Solve communication for this processor"
        "\n                   aggregates              solution"
        "\n                   #    # bytes          #     #bytes"
        "\n   send   %10d %10d %10d %10d"
        "\n   recv   %10d %10d %10d %10d",
        stats[1], stats[3], stats[0], stats[2],
        stats[5], stats[7], stats[4], stats[6]) ;
fflush(msgFile) ;
MPI_Reduce((void *) stats, (void *) tstats, 8, MPI_INT,
          MPI_SUM, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, 
           "\n\n Solve communication for all processors"
           "\n                   aggregates              solution"
           "\n                   #    # bytes          #     #bytes"
           "\n   send   %10d %10d %10d %10d"
           "\n   recv   %10d %10d %10d %10d",
           tstats[1], tstats[3], tstats[0], tstats[2],
           tstats[5], tstats[7], tstats[4], tstats[6]) ;
   fflush(msgFile) ;
}
SubMtxManager_writeForHumanEye(solvemanager, msgFile) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n computed solution") ;
   DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
   fflush(msgFile) ;
}
DenseMtx_sub(mtxZ, mtxX) ;
error  = DenseMtx_maxabs(mtxZ) ;
fprintf(msgFile, "\n\n local error  = %12.4e", error) ;
MPI_Reduce((void *) &error, (void *) &terror, 1, MPI_DOUBLE,
           MPI_MAX, 0, MPI_COMM_WORLD) ;
if ( myid == 0 ) {
   fprintf(msgFile, "\n global error = %12.4e", terror) ;
   fflush(msgFile) ;
}
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n error matrix") ;
   DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
   fflush(msgFile) ;
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
DenseMtx_free(mtxX) ;
DenseMtx_free(mtxB) ;
DenseMtx_free(mtxZ) ;
InpMtx_free(mtxA) ;
SubMtxManager_free(factormanager) ;
SubMtxManager_free(solvemanager) ;
ChvManager_free(chvmanager) ;
FrontMtx_free(frontmtx) ;
ETree_free(frontETree) ;
IVL_free(symbfacIVL) ;
IV_free(vtxmapIV) ;
IV_free(frontOwnersIV) ;
SolveMap_free(solvemap) ;

MPI_Barrier(MPI_COMM_WORLD) ;

fclose(msgFile) ;

MPI_Finalize() ;

return(1) ; }

/*--------------------------------------------------------------------*/


syntax highlighted by Code2HTML, v. 0.9.1