/* bgmresl.c */
#include "../Iter.h"
/*--------------------------------------------------------------------*/
/*
---------------------------------------------------------------------
purpose -- to solve the system A X = B using the block GMRES
Arnoldi iteration method with left preconditioning
neqns -- # of equations
type -- must be SPOOLES_REAL or SPOOLES_COMPLEX
symmetryflag -- must be SPOOLES_SYMMETRIC or SPOOLES_NONSYMMETRIC
mtxA -- pointer to object for A
mtxM -- pointer to object for M, may be NULL
mtxX -- pointer to object for X, zero on input
mtxB -- pointer to object for B
maxnouter -- maximum number of outer iterations
maxninner -- maximum number of inner iterations
pninner -- the last # of inner iterations executed
pnouter -- the last # of outer iterations executed
convergetol -- tolerance for convergence
convergence achieved when ||r||_f <= convergetol * ||r_0||_f
msglvl -- message level
1 -- no output
2 -- iteration #, residual, ratio
3 -- scalar information
4 -- scalar and vector information
msgFile -- message file
return value ---
1 -- normal return
-1 -- neqns <= 0
-2 -- type invalid
-3 -- symmetryflag invalid
-4 -- mtxA NULL
-5 -- mtxX NULL
-6 -- mtxB NULL
-7 -- convergetol invalid
-8 -- maxnouter and/or maxninner invalid
-9 -- pnouter NULL
-10 -- pninner NULL
-11 -- msglvl > 0 and msgFile = NULL
created -- 98dec03, dkw
---------------------------------------------------------------------
*/
int
bgmresl (
int neqns,
int type,
int symmetryflag,
InpMtx *mtxA,
FrontMtx *mtxM,
DenseMtx *mtxX,
DenseMtx *mtxB,
int maxnouter,
int maxninner,
int *pnouter,
int *pninner,
double convergetol,
int msglvl,
FILE *msgFile
) {
A2 *A2G, *A2Gi, *A2Gj, *A2H, *A2Hij, *A2Hkj, *A2H11, *A2H12,
*A2Vj, *A2Vk, *A2Z ;
DenseMtx *G, *Gj, *H, *Hij, *Hkj, *V, *Vi, *Vj, *Vk, *Z;
double Hkk, Hik, initResNorm, ops, resnorm, t0, t1 ;
double cpus[9], minusone[2] = {-1.0, 0.0}, one[2] = {1.0, 0.0},
zero[2] = {0.0, 0.0} ;
double *col, *val, *Y ;
DV workDV ;
int converged, i, ii, iinner, jinner, jouter, kinner,
nrhs, nstep,
hijfrow, hijlrow, hijfcol, hijlcol, hkjfcol, hkjlcol,
iend, inc1, inc2, irow, istart, jcol, ncol, ndiag, nrow,
rc, vifcol, vilcol, vjfcol, vjlcol, vkfcol, vklcol ;
/*
---------------
check the input
---------------
*/
MARKTIME(t0) ;
if ( neqns <= 0 ) {
fprintf(stderr, "\n error in bgmresl()"
"\n neqns = %d\n", neqns) ;
return(-1) ;
}
if ( type != SPOOLES_REAL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n type = %d, must be SPOOLES_REAL\n", type) ;
return(-2) ;
}
if ( symmetryflag != SPOOLES_SYMMETRIC
&& symmetryflag != SPOOLES_NONSYMMETRIC ) {
fprintf(stderr, "\n error in bgmresl()"
"\n symmetryflag = %d"
"\n must be SPOOLES_SYMMETRIC or SPOOLES_NONSYMMETRIC\n",
symmetryflag) ;
return(-3) ;
}
if ( mtxA == NULL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n mtxA is NULL\n") ;
return(-4) ;
}
if ( mtxX == NULL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n mtxX is NULL\n") ;
return(-5) ;
}
if ( mtxB == NULL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n mtxB is NULL\n") ;
return(-6) ;
}
if ( convergetol < 0.0 ) {
fprintf(stderr, "\n error in bgmresl()"
"\n convergetol is %e\n", convergetol) ;
return(-7) ;
}
if ( maxnouter <= 0 || maxninner <= 0 ) {
fprintf(stderr, "\n error in bgmresl()"
"\n maxnouter %d, maxninner %d\n", maxnouter, maxninner) ;
return(-8) ;
}
if ( pnouter == NULL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n pnouter is NULL\n") ;
return(-9) ;
}
if ( pninner == NULL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n pninner is NULL\n") ;
return(-10) ;
}
if ( msglvl > 0 && msgFile == NULL ) {
fprintf(stderr, "\n error in bgmresl()"
"\n msglvl = %d and msgFile is NULL\n", msglvl) ;
return(-11) ;
}
if ( msglvl > 1 ) {
fprintf(msgFile,
"\n\n maximum # of outer iterations = %d"
"\n maximum # of inner iterations = %d",
maxnouter, maxninner) ;
fflush(msgFile) ;
}
/*
-------------------------------------------------
check for zero rhs (if B is zero, then X is zero)
-------------------------------------------------
*/
initResNorm = DenseMtx_frobNorm(mtxB) ;
if ( msglvl > 1 ) {
fprintf(msgFile, "\n\n initial residual = %12.4e", initResNorm) ;
fflush(msgFile) ;
}
if ( initResNorm == 0.0 ) {
*pnouter = *pninner = 0 ;
return(1) ;
}
/*
---------------------------
initialize the working data
---------------------------
*/
nrhs = mtxB->ncol ;
Y = DVinit(maxninner+1, 0.0) ;
DV_setDefaultFields(&workDV) ;
H = DenseMtx_new() ;
DenseMtx_init(H, SPOOLES_REAL, 0, 0,
(maxninner+1)*nrhs, maxninner*nrhs, 1, (maxninner+1)*nrhs) ;
DenseMtx_zero(H) ;
V = DenseMtx_new() ;
DenseMtx_init(V, SPOOLES_REAL, 0, 0, neqns, nrhs*(maxninner+1), 1, neqns) ;
DenseMtx_zero(V) ;
G = DenseMtx_new() ;
DenseMtx_init(G, SPOOLES_REAL, 0, 0,
(maxninner+1)*nrhs, nrhs, 1, (maxninner+1)*nrhs) ;
DenseMtx_zero(G) ;
Z = DenseMtx_new() ;
DenseMtx_init(Z, SPOOLES_REAL, 0, 0, neqns, nrhs, 1, neqns) ;
DenseMtx_zero(Z) ;
Vi = DenseMtx_new() ;
Vj = DenseMtx_new() ;
Vk = DenseMtx_new() ;
Gj = DenseMtx_new() ;
Hij = DenseMtx_new() ;
Hkj = DenseMtx_new() ;
A2G = A2_new() ;
A2H = A2_new() ;
A2Z = A2_new() ;
DenseMtx_setA2(Z, A2Z) ;
/*
---------------------------
main loop of the iterations
---------------------------
*/
converged = 0 ;
for ( jouter = 0 ; jouter < maxnouter ; jouter++ ) {
A2Gi = A2_new() ;
A2Vj = A2_new() ;
DenseMtx_zero(H) ;
DenseMtx_zero(V) ;
DenseMtx_zero(G) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n outer iteration %d", jouter) ;
fflush(msgFile) ;
}
/*
-----------------------------------------------------
compute the preconditioned residual and load into V_0
1. compute W = B - A*X
2. solve M Z = W
3. set W = Z
-----------------------------------------------------
*/
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n B") ;
DenseMtx_writeForHumanEye(mtxB, msgFile) ;
fflush(msgFile) ;
}
for ( i = 0 ; i < nrhs ; i++ )
DenseMtx_colCopy(Z, i, mtxB, i) ;
if ( symmetryflag == SPOOLES_SYMMETRIC ) {
InpMtx_sym_mmm(mtxA, Z, minusone, mtxX) ;
} else {
InpMtx_nonsym_mmm(mtxA, Z, minusone, mtxX) ;
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n ||B - A*X||_f = %12.4e",
DenseMtx_frobNorm(Z)) ;
fflush(msgFile) ;
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n B - A*X") ;
DenseMtx_writeForHumanEye(Z, msgFile) ;
fflush(msgFile) ;
}
if ( mtxM != NULL ) {
FrontMtx_solve(mtxM, Z, Z, mtxM->manager,
cpus, 0, msgFile) ;
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n ||M^{-1}(B - A*X)||_f = %12.4e",
DenseMtx_frobNorm(Z)) ;
fflush(msgFile) ;
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n M^{-1}(B - A*X)") ;
DenseMtx_writeForHumanEye(Z, msgFile) ;
fflush(msgFile) ;
}
/*
---------------------------------------------------
Compute the QR factorization of the initial
residual to get our starting orthogonal vectors V_0
and our top block in G
---------------------------------------------------
*/
ops = A2_QRreduce(A2Z, &workDV, msglvl, msgFile) ;
rc = DenseMtx_initAsSubmatrix(Vj, V, 0, neqns-1, 0, nrhs-1) ;
DenseMtx_setA2(Vj, A2Vj) ;
A2_computeQ(A2Vj, A2Z, &workDV, msglvl, msgFile) ;
DenseMtx_initAsSubmatrix(Gj, G, 0, 2*nrhs-1, 0, nrhs-1) ;
DenseMtx_setA2(Gj, A2Gi) ;
nrow = A2Z->n1 ;
ncol = A2Z->n2 ;
inc1 = A2Z->inc1 ;
inc2 = A2Z->inc2 ;
if ( nrow >= ncol ) {
ndiag = ncol ;
} else {
ndiag = nrow ;
}
for ( jcol = 0, col = A2Z->entries, val = A2Gi->entries ;
jcol < ncol ;
jcol++, col += inc2, val += inc2 ) {
istart = 0 ;
iend = (jcol < ndiag) ? jcol : ndiag - 1 ;
for ( irow = istart, ii = irow*inc1 ;
irow <= iend ;
irow++, ii += inc1 ) {
val[ii] = col[ii] ;
}
}
resnorm = DenseMtx_frobNorm(G) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n V_%d = ", jouter) ;
A2_writeForHumanEye(A2Vj, msgFile) ;
fprintf(msgFile, "\n\n G_%d%d = ", jouter, jouter) ;
A2_writeForHumanEye(A2Gi, msgFile) ;
fflush(msgFile) ;
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n ||G||_f = %12.4e", resnorm) ;
fflush(msgFile) ;
}
if ( resnorm == 0.0 ) {
break ;
}
if ( jouter == 0 ) {
initResNorm = resnorm ;
}
/*
------------------------------------------------------------
loop over the inner gmres iterations using Arnoldi iteration
------------------------------------------------------------
*/
nstep = maxninner - 1 ;
for ( jinner = 0 ; jinner < maxninner ; jinner++ ) {
A2Gj = A2_new() ;
A2Hij = A2_new() ;
A2Hkj = A2_new() ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n inner iteration %d", jinner) ;
fflush(msgFile) ;
}
/*
----------------------------
compute W = M^{-1} * A * V_j
----------------------------
*/
DenseMtx_zero(Z) ;
vjfcol = jinner*nrhs ;
vjlcol = vjfcol+nrhs-1 ;
DenseMtx_initAsSubmatrix(Vj, V, 0, neqns-1, vjfcol, vjlcol) ;
if ( symmetryflag == SPOOLES_SYMMETRIC ) {
InpMtx_sym_mmm(mtxA, Z, one, Vj) ;
} else {
InpMtx_nonsym_mmm(mtxA, Z, one, Vj) ;
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n ||A*V_%d||_f = %12.4e",
jinner, DenseMtx_frobNorm(Z)) ;
fflush(msgFile) ;
}
if ( mtxM != NULL ) {
FrontMtx_solve(mtxM, Z, Z, mtxM->manager,
cpus, 0, msgFile) ;
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n ||M^{-1}*A*V_%d||_f = %12.4e",
jinner, DenseMtx_frobNorm(Z)) ;
fflush(msgFile) ;
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n M^{-1}*A*V_%d", jinner) ;
DenseMtx_writeForHumanEye(Z, msgFile) ;
fflush(msgFile) ;
}
for ( iinner = 0 ; iinner <= jinner ; iinner++ ) {
vifcol = iinner*nrhs ;
vilcol = vifcol+nrhs-1 ;
DenseMtx_initAsSubmatrix(Vi, V, 0, neqns-1, vifcol, vilcol) ;
hijfrow = vifcol ;
hijlrow = vilcol ;
hijfcol = jinner*nrhs ;
hijlcol = hijfcol+nrhs-1 ;
DenseMtx_initAsSubmatrix(Hij, H, hijfrow, hijlrow, hijfcol, hijlcol) ;
DenseMtx_mmm("t", "n", zero, Hij, one, Vi, Z) ;
DenseMtx_mmm("n", "n", one, Z, minusone, Vi, Hij) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n H_%d%d = ", iinner, jinner) ;
DenseMtx_writeForHumanEye(Hij, msgFile) ;
fflush(msgFile) ;
}
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n Wj after Arnoldi iteration") ;
DenseMtx_writeForHumanEye(Z, msgFile) ;
fflush(msgFile) ;
}
/*
-----------------------------------------------
compute V_jinner+1 and H_jinner+1,jinner blocks
using QR decomposition of W_j then
copy R from A2Z(1:ncolA,1:ncolA) into H_jinner+1,jinner
-----------------------------------------------
*/
ops = A2_QRreduce(A2Z, &workDV, msglvl, msgFile) ;
vkfcol = vjlcol+1 ;
vklcol = vkfcol+nrhs-1 ;
DenseMtx_initAsSubmatrix(Vk, V, 0, neqns-1, vkfcol, vklcol) ;
A2Vk = A2_new() ;
DenseMtx_setA2(Vk, A2Vk) ;
A2_computeQ(A2Vk, A2Z, &workDV, msglvl, msgFile) ;
A2_free(A2Vk) ;
hijfrow = hijlrow+1 ;
hijlrow = hijfrow+nrhs-1 ;
DenseMtx_initAsSubmatrix(Hkj, H, hijfrow, hijlrow, hijfcol, hijlcol) ;
DenseMtx_setA2(Hkj, A2Hkj) ;
nrow = A2Z->n1 ;
ncol = A2Z->n2 ;
inc1 = A2Z->inc1 ;
inc2 = A2Z->inc2 ;
if ( nrow >= ncol ) {
ndiag = ncol ;
} else {
ndiag = nrow ;
}
for ( jcol = 0, col = A2Z->entries, val = A2Hkj->entries ;
jcol < ncol ;
jcol++, col += inc2, val += inc2 ) {
istart = 0 ;
iend = (jcol < ndiag) ? jcol : ndiag - 1 ;
for ( irow = istart, ii = irow*inc1 ;
irow <= iend ;
irow++, ii += inc1 ) {
val[ii] = col[ii] ;
}
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n V_%d = ", jinner+1) ;
DenseMtx_writeForHumanEye(Vk, msgFile) ;
fprintf(msgFile, "\n\n H_%d%d = ", jinner+1, jinner) ;
DenseMtx_writeForHumanEye(Hij, msgFile) ;
fflush(msgFile) ;
}
/*
-------------------------------------------------
Before starting the next iteration, triangulate
the Hessenberg matrix. To do this we must first
apply the previous triangulating transformations
to the newly constructed columns and then
triangulate the new portion of the Hessenberg
matrix. When we compute these new transformations
we need to also apply them to the corresponding
portion of G to keep track of the norm of the
current residual.
First apply the previous transformations
-------------------------------------------------
*/
hijfrow = 0 ;
hijlrow = 2*nrhs-1 ;
hkjfcol = 0 ;
hkjlcol = nrhs-1 ;
for ( iinner = 0 ; iinner <= jinner-1 ; iinner++ ) {
A2H11 = A2_new() ;
A2H12 = A2_new() ;
DenseMtx_initAsSubmatrix(Hij, H, hijfrow, hijlrow, hkjfcol, hkjlcol) ;
DenseMtx_setA2(Hij, A2H11) ;
DenseMtx_initAsSubmatrix(Hkj, H, hijfrow, hijlrow, hijfcol, hijlcol) ;
DenseMtx_setA2(Hkj, A2H12) ;
A2_applyQT(A2H12, A2H11, A2H12, &workDV, msglvl, msgFile) ;
hijfrow += nrhs ;
hijlrow += nrhs ;
hkjfcol += nrhs ;
hkjlcol += nrhs ;
A2_free(A2H11) ;
A2_free(A2H12) ;
}
/*
-------------------------------------------------
Compute the QR factorization of the diagonal and
subdiagonal blocks of the block Hessenberg matrix
-------------------------------------------------
*/
DenseMtx_initAsSubmatrix(Hij, H, hijfrow, hijlrow, hijfcol, hijlcol) ;
DenseMtx_setA2(Hij, A2Hij) ;
ops = A2_QRreduce(A2Hij, &workDV, msglvl, msgFile) ;
/*
----------------------------------------------------------
apply the transformation to the corresponding section of G
----------------------------------------------------------
*/
DenseMtx_initAsSubmatrix(Gj, G, hijfrow, hijlrow, 0, nrhs-1) ;
DenseMtx_setA2(Gj, A2Gj) ;
A2_applyQT(A2Gj, A2Hij, A2Gj, &workDV, msglvl, msgFile) ;
DenseMtx_initAsSubmatrix(Gj, G, hijfrow+nrhs, hijlrow, 0, nrhs-1) ;
resnorm = DenseMtx_frobNorm(Gj) ;
if ( msglvl > 1 ) {
fprintf(msgFile, "\n outer %d, inner %d, res = %24.16e",
jouter, jinner, resnorm) ;
fflush(msgFile) ;
}
/*
-----------------
check convergence
-----------------
*/
if ( resnorm <= convergetol * initResNorm ) {
if ( msglvl > 1 ) {
fprintf(msgFile, "\n BGMRESL convergence has been achieved") ;
fprintf(msgFile, "\n ***convergetol = %24.16e", convergetol) ;
fprintf(msgFile, "\n ***initResNorm = %24.16e", initResNorm) ;
fflush(msgFile) ;
}
/*
------------------------------------------------
convergence has been achieved, break out of loop
------------------------------------------------
*/
nstep = jinner ;
converged = 1 ;
A2_free(A2Gj) ;
A2_free(A2Hij) ;
A2_free(A2Hkj) ;
break ;
}
A2_free(A2Gj) ;
A2_free(A2Hij) ;
A2_free(A2Hkj) ;
}
/*
----------------------------------
for each rhs copy Gi into vector Y
solve Y := H^{-1} Y
and store Y back into Gi
----------------------------------
*/
DenseMtx_setA2(H, A2H) ;
DenseMtx_setA2(G, A2G) ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n H") ;
A2H->n1 = nstep+1 ;
A2H->n2 = nstep+1 ;
A2_writeForHumanEye(A2H, msgFile) ;
A2G->n1 = nstep+1 ;
fprintf(msgFile, "\n\n G") ;
A2_writeForHumanEye(A2G, msgFile) ;
A2H->n1 = maxninner+1 ;
A2H->n2 = maxninner+1 ;
fprintf(msgFile, "\n\n before solve, Y") ;
fprintf(msgFile, "\n\n before solve ykinner = %24.16e ",Y[7]) ;
DVfprintf(msgFile, nstep+1, Y) ;
fflush(msgFile) ;
}
for ( i = 0 ; i < nrhs ; i++ ) {
A2_extractColumn(A2G, Y, i) ;
for ( kinner = nstep ; kinner >= 0 ; kinner-- ) {
A2_realEntry(A2H, kinner, kinner, &Hkk) ;
Y[kinner] = Y[kinner] / Hkk ;
for ( iinner = 0 ; iinner < kinner ; iinner++ ) {
A2_realEntry(A2H, iinner, kinner, &Hik) ;
Y[iinner] -= Y[kinner] * Hik ;
}
}
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n after solve, Y") ;
DVfprintf(msgFile, nstep+1, Y) ;
fflush(msgFile) ;
}
A2_setColumn(A2G, Y, i) ;
}
/*
-------------------------------------
compute the new solution X := X + V*Y
-------------------------------------
*/
DenseMtx_mmm("n", "n", one, mtxX, one, V, G) ;
if ( msglvl > 2 ) {
resnorm = DenseMtx_frobNorm(mtxX) ;
fprintf(msgFile, "\n ||X||_f = %24.16e", resnorm) ;
fflush(msgFile) ;
}
if ( msglvl > 3 ) {
fprintf(msgFile, "\n\n X") ;
DenseMtx_writeForHumanEye(mtxX, msgFile) ;
fflush(msgFile) ;
}
if ( converged == 1 ) {
break ;
}
A2_free(A2Gi) ;
A2_free(A2Vj) ;
}
/*
-----------
return info
-----------
*/
*pnouter = jouter + 1 ;
*pninner = jinner + 1 ;
if ( msglvl > 2 ) {
fprintf(msgFile, "\n\n before leaving, *pnouter = %d, *pinner = %d",
*pnouter, *pninner) ;
fflush(msgFile) ;
}
/*
------------------------
free the working storage
------------------------
*/
A2_free(A2G) ;
A2_free(A2H) ;
A2_free(A2Z) ;
A2_free(A2Gi) ;
A2_free(A2Vj) ;
DenseMtx_free(Gj) ;
DenseMtx_free(G) ;
DenseMtx_free(Hij) ;
DenseMtx_free(Hkj) ;
DenseMtx_free(H) ;
DenseMtx_free(Vi) ;
DenseMtx_free(Vj) ;
DenseMtx_free(Vk) ;
DenseMtx_free(V) ;
DenseMtx_free(Z) ;
DV_clearData(&workDV) ;
DVfree(Y) ;
return(1) ; }
/*--------------------------------------------------------------------*/
syntax highlighted by Code2HTML, v. 0.9.1