/* testQRgridMT.c */
#include "../spoolesMT.h"
#include "../../FrontMtx.h"
#include "../../Drand.h"
#include "../../timings.h"
/*--------------------------------------------------------------------*/
void mkNDlinsysQR ( int n1, int n2, int n3, int type, int nrhs,
int seed, int msglvl, FILE *msgFile, ETree **pfrontETree,
IVL **psymbfacIVL, InpMtx **pmtxA, DenseMtx **pmtxX,
DenseMtx **pmtxB) ;
/*--------------------------------------------------------------------*/
int
main ( int argc, char *argv[] )
/*
---------------------------------------------------
test the QR factor method for a FrontMtx object
on an n1 x n2 x n3 grid
(1) generate an overdetermined system AX = B
(2) factor the matrix
(3) solve the systems
created -- 98may29, cca
---------------------------------------------------
*/
{
ChvManager *chvmanager ;
DenseMtx *mtxB, *mtxX, *mtxZ ;
double cputotal, cutoff, factorops ;
double cpus[10] ;
double nops, t1, t2 ;
DV *cumopsDV ;
ETree *frontETree ;
FILE *msgFile ;
FrontMtx *frontmtx ;
InpMtx *mtxA ;
int maptype, msglvl, neqns, nrhs, nthread,
n1, n2, n3, seed, type ;
IV *frontOwnersIV ;
IVL *symbfacIVL ;
SolveMap *solvemap ;
SubMtxManager *mtxmanager ;
if ( argc != 12 ) {
fprintf(stdout,
"\n\n usage : %s msglvl msgFile n1 n2 n3 seed nrhs type "
"\n nthread maptype cutoff"
"\n msglvl -- message level"
"\n msgFile -- message file"
"\n n1 -- # of points in the first direction"
"\n n2 -- # of points in the second direction"
"\n n3 -- # of points in the third direction"
"\n seed -- random number seed"
"\n nrhs -- # of right hand sides"
"\n type -- type of linear system"
"\n 1 -- real"
"\n 2 -- complex"
"\n nthread -- number of threads"
"\n maptype -- type of map from fronts to threads"
"\n 1 --> wrap map"
"\n 2 --> balanced map via a post-order traversal"
"\n 3 --> subtree-subset map"
"\n 4 --> domain decomposition map"
"\n cutoff -- cutoff used for domain decomposition map"
"\n 0 <= cutoff <= 1 used to define the multisector"
"\n", argv[0]) ;
return(0) ;
}
msglvl = atoi(argv[1]) ;
if ( strcmp(argv[2], "stdout") == 0 ) {
msgFile = stdout ;
} else if ( (msgFile = fopen(argv[2], "a")) == NULL ) {
fprintf(stderr, "\n fatal error in %s"
"\n unable to open file %s\n",
argv[0], argv[2]) ;
return(-1) ;
}
n1 = atoi(argv[3]) ;
n2 = atoi(argv[4]) ;
n3 = atoi(argv[5]) ;
seed = atoi(argv[6]) ;
nrhs = atoi(argv[7]) ;
type = atoi(argv[8]) ;
nthread = atoi(argv[8]) ;
maptype = atoi(argv[8]) ;
cutoff = atoi(argv[8]) ;
fprintf(msgFile,
"\n %s "
"\n msglvl -- %d"
"\n msgFile -- %s"
"\n n1 -- %d"
"\n n2 -- %d"
"\n n3 -- %d"
"\n seed -- %d"
"\n nrhs -- %d"
"\n type -- %d"
"\n nthread -- %d"
"\n maptype -- %d"
"\n cutoff -- %f"
"\n",
argv[0], msglvl, argv[2], n1, n2, n3, seed, nrhs, type,
nthread, maptype, cutoff) ;
fflush(msgFile) ;
neqns = n1*n2*n3 ;
if ( type != SPOOLES_REAL && type != SPOOLES_COMPLEX ) {
fprintf(stderr, "\n fatal error, type must be real or complex") ;
exit(-1) ;
}
/*
------------------------------------------
generate the A X = B overdetermined system
------------------------------------------
*/
mkNDlinsysQR(n1, n2, n3, type, nrhs, seed, msglvl, msgFile,
&frontETree, &symbfacIVL, &mtxA, &mtxX, &mtxB) ;
fprintf(msgFile, "\n\n %d entries in A", InpMtx_nent(mtxA)) ;
/*
--------------------------------------------------
initialize the cumulative operations metric object
--------------------------------------------------
*/
cumopsDV = DV_new() ;
DV_init(cumopsDV, nthread, NULL) ;
DV_fill(cumopsDV, 0.0) ;
/*
-------------------------------
create the owners map IV object
-------------------------------
*/
switch ( maptype ) {
case 1 :
frontOwnersIV = ETree_wrapMap(frontETree, type,
SPOOLES_SYMMETRIC, cumopsDV) ;
break ;
case 2 :
frontOwnersIV = ETree_balancedMap(frontETree, type,
SPOOLES_SYMMETRIC, cumopsDV) ;
break ;
case 3 :
frontOwnersIV = ETree_subtreeSubsetMap(frontETree, type,
SPOOLES_SYMMETRIC, cumopsDV) ;
break ;
case 4 :
frontOwnersIV = ETree_ddMap(frontETree, type,
SPOOLES_SYMMETRIC, cumopsDV, cutoff) ;
break ;
}
if ( msglvl > 1 ) {
fprintf(msgFile, "\n\n totalOps = %.0f", DV_sum(cumopsDV)) ;
DVscale(DV_size(cumopsDV), DV_entries(cumopsDV),
nthread/DV_sum(cumopsDV)) ;
fprintf(msgFile, "\n\n cumopsDV") ;
DV_writeForHumanEye(cumopsDV, msgFile) ;
}
if ( msglvl > 1 ) {
fprintf(msgFile, "\n\n frontOwnersIV") ;
IV_writeForHumanEye(frontOwnersIV, msgFile) ;
fflush(msgFile) ;
}
/*
------------------------------
initialize the FrontMtx object
------------------------------
*/
MARKTIME(t1) ;
mtxmanager = SubMtxManager_new() ;
SubMtxManager_init(mtxmanager, LOCK_IN_PROCESS, 0) ;
frontmtx = FrontMtx_new() ;
if ( type == SPOOLES_REAL ) {
FrontMtx_init(frontmtx, frontETree, symbfacIVL, type,
SPOOLES_SYMMETRIC, FRONTMTX_DENSE_FRONTS,
SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS,
0, NULL, mtxmanager, msglvl, msgFile) ;
} else if ( type == SPOOLES_COMPLEX ) {
FrontMtx_init(frontmtx, frontETree, symbfacIVL, type,
SPOOLES_HERMITIAN, FRONTMTX_DENSE_FRONTS,
SPOOLES_NO_PIVOTING, LOCK_IN_PROCESS,
0, NULL, mtxmanager, msglvl, msgFile) ;
}
MARKTIME(t2) ;
fprintf(msgFile, "\n CPU %8.3f : initialize the front matrix",
t2 - t1) ;
fprintf(msgFile, "\n\n %d entries in D, %d entries in U",
frontmtx->nentD, frontmtx->nentU) ;
/*
-----------------
factor the matrix
-----------------
*/
DVzero(10, cpus) ;
InpMtx_changeCoordType(mtxA, INPMTX_BY_ROWS) ;
InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS) ;
chvmanager = ChvManager_new() ;
ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1) ;
MARKTIME(t1) ;
FrontMtx_MT_QR_factor(frontmtx, mtxA, chvmanager, frontOwnersIV,
cpus, &factorops, msglvl, msgFile) ;
MARKTIME(t2) ;
fprintf(msgFile,
"\n CPU %8.3f : FrontMtx_MT_QR_factor, %.0f ops, %.2f mflops",
t2 - t1, factorops, 1.e-6*factorops/(t2-t1)) ;
cputotal = cpus[6] ;
if ( cputotal > 0.0 ) {
fprintf(msgFile, "\n"
"\n CPU %%"
"\n setup factorization %8.3f %6.2f"
"\n setup fronts %8.3f %6.2f"
"\n factor fronts %8.3f %6.2f"
"\n store factor %8.3f %6.2f"
"\n store update %8.3f %6.2f"
"\n miscellaneous %8.3f %6.2f"
"\n total time %8.3f"
"\n wall clock time %8.3f",
cpus[0], 100.*cpus[0]/cputotal,
cpus[1], 100.*cpus[1]/cputotal,
cpus[2], 100.*cpus[2]/cputotal,
cpus[3], 100.*cpus[3]/cputotal,
cpus[4], 100.*cpus[4]/cputotal,
cpus[5], 100.*cpus[5]/cputotal,
cpus[6], t2 - t1) ;
}
fprintf(msgFile, "\n\n ChvManager statistics") ;
ChvManager_writeForHumanEye(chvmanager, msgFile) ;
/*
------------------------------
post-process the factor matrix
------------------------------
*/
MARKTIME(t1) ;
FrontMtx_postProcess(frontmtx, msglvl, msgFile) ;
MARKTIME(t2) ;
fprintf(msgFile, "\n\n CPU %8.3f : post-process the matrix", t2 - t1) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n front factor matrix after post-processing") ;
FrontMtx_writeForHumanEye(frontmtx, msgFile) ;
}
fprintf(msgFile, "\n\n after post-processing") ;
SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
/*
----------------
solve the system
----------------
*/
mtxZ = DenseMtx_new() ;
DenseMtx_init(mtxZ, type, 0, 0, neqns, nrhs, 1, neqns) ;
DenseMtx_zero(mtxZ) ;
if ( type == SPOOLES_REAL ) {
nops = frontmtx->nentD + 2*frontmtx->nentU ;
if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
nops += 2*frontmtx->nentL ;
} else {
nops += 2*frontmtx->nentU ;
}
} else if ( type == SPOOLES_COMPLEX ) {
nops = 8*frontmtx->nentD + 8*frontmtx->nentU ;
if ( FRONTMTX_IS_NONSYMMETRIC(frontmtx) ) {
nops += 8*frontmtx->nentL ;
} else {
nops += 8*frontmtx->nentU ;
}
}
nops *= nrhs ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n rhs") ;
DenseMtx_writeForHumanEye(mtxB, msgFile) ;
fflush(stdout) ;
}
DVzero(7, cpus) ;
MARKTIME(t1) ;
FrontMtx_QR_solve(frontmtx, mtxA, mtxZ, mtxB, mtxmanager,
cpus, msglvl, msgFile) ;
MARKTIME(t2) ;
fprintf(msgFile,
"\n\n CPU %8.3f : serial solve, %d rhs, %.0f ops, %.3f mflops",
t2 - t1, nrhs, nops, 1.e-6*nops/(t2 - t1)) ;
cputotal = t2 - t1 ;
if ( cputotal > 0.0 ) {
fprintf(msgFile, "\n"
"\n CPU %%"
"\n A^TB matrix-matrix multiply %8.3f %6.2f"
"\n total solve time %8.3f %6.2f"
"\n set up solves %8.3f %6.2f"
"\n load rhs and store solution %8.3f %6.2f"
"\n forward solve %8.3f %6.2f"
"\n diagonal solve %8.3f %6.2f"
"\n backward solve %8.3f %6.2f"
"\n total QR solve time %8.3f",
cpus[6], 100.*cpus[6]/cputotal,
cpus[5], 100.*cpus[5]/cputotal,
cpus[0], 100.*cpus[0]/cputotal,
cpus[1], 100.*cpus[1]/cputotal,
cpus[2], 100.*cpus[2]/cputotal,
cpus[3], 100.*cpus[3]/cputotal,
cpus[4], 100.*cpus[4]/cputotal,
cputotal) ;
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n serial solve computed solution") ;
DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
fflush(stdout) ;
}
/*
-----------------
compute the error
-----------------
*/
DenseMtx_sub(mtxZ, mtxX) ;
fprintf(msgFile, "\n\n serial solve: maxabs error = %12.4e",
DenseMtx_maxabs(mtxZ)) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n error") ;
DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
fflush(stdout) ;
}
fprintf(msgFile, "\n\n after serial solve") ;
SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
/*
--------------------------------
now solve the system in parallel
--------------------------------
*/
solvemap = SolveMap_new() ;
SolveMap_ddMap(solvemap, SPOOLES_SYMMETRIC,
FrontMtx_upperBlockIVL(frontmtx), NULL,
nthread, frontOwnersIV, frontmtx->tree,
seed, msglvl, msgFile) ;
fprintf(msgFile, "\n solve map created") ;
fflush(msgFile) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n rhs") ;
DenseMtx_writeForHumanEye(mtxB, msgFile) ;
fflush(stdout) ;
}
DenseMtx_zero(mtxZ) ;
MARKTIME(t1) ;
FrontMtx_MT_QR_solve(frontmtx, mtxA, mtxZ, mtxB, mtxmanager,
solvemap, cpus, msglvl, msgFile) ;
MARKTIME(t2) ;
fprintf(msgFile,
"\n\n CPU %8.3f : parallel solve, %d rhs, %.0f ops, %.3f mflops",
t2 - t1, nrhs, nops, 1.e-6*nops/(t2 - t1)) ;
cputotal = t2 - t1 ;
if ( cputotal > 0.0 ) {
fprintf(msgFile, "\n"
"\n CPU %%"
"\n A^TB matrix-matrix multiply %8.3f %6.2f"
"\n total solve time %8.3f %6.2f"
"\n set up solves %8.3f %6.2f"
"\n load rhs and store solution %8.3f %6.2f"
"\n forward solve %8.3f %6.2f"
"\n diagonal solve %8.3f %6.2f"
"\n backward solve %8.3f %6.2f"
"\n total QR solve time %8.3f",
cpus[6], 100.*cpus[6]/cputotal,
cpus[5], 100.*cpus[5]/cputotal,
cpus[0], 100.*cpus[0]/cputotal,
cpus[1], 100.*cpus[1]/cputotal,
cpus[2], 100.*cpus[2]/cputotal,
cpus[3], 100.*cpus[3]/cputotal,
cpus[4], 100.*cpus[4]/cputotal,
cputotal) ;
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n computed solution from parallel solve") ;
DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
fflush(stdout) ;
}
/*
-----------------
compute the error
-----------------
*/
DenseMtx_sub(mtxZ, mtxX) ;
fprintf(msgFile, "\n\n parallel solve: maxabs error = %12.4e",
DenseMtx_maxabs(mtxZ)) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n error") ;
DenseMtx_writeForHumanEye(mtxZ, msgFile) ;
fflush(stdout) ;
}
fprintf(msgFile, "\n\n after parallel solve") ;
SubMtxManager_writeForHumanEye(frontmtx->manager, msgFile) ;
/*
------------------------
free the working storage
------------------------
*/
InpMtx_free(mtxA) ;
DenseMtx_free(mtxX) ;
DenseMtx_free(mtxZ) ;
DenseMtx_free(mtxB) ;
FrontMtx_free(frontmtx) ;
IVL_free(symbfacIVL) ;
ETree_free(frontETree) ;
SubMtxManager_free(mtxmanager) ;
ChvManager_free(chvmanager) ;
DV_free(cumopsDV) ;
IV_free(frontOwnersIV) ;
SolveMap_free(solvemap) ;
fprintf(msgFile, "\n") ;
fclose(msgFile) ;
return(1) ; }
/*--------------------------------------------------------------------*/
syntax highlighted by Code2HTML, v. 0.9.1