#include "BSprivate.h"
/*+ BSpar_symmlq - SYMMLQ; reverse communication
Input Parameters:
. num_cols - the length of the vector on this processor
. rhs - the rhs on this processor
. x - the solution vector on this processor
. resid - the residual vector on this processor
. z - the work vector on this processor
. v - the work vector on this processor
. w - the work vector on this processor
. r2 - the work vector on this processor
. beta thru conv - symmlq values that aren't of much interest
. cur_step - the current cg step
. cur_phase - the current cg phase
. procinfo - the usual processor stuff
Output Parameters:
. many of the input parameters are changed, it is just the usual
SYMMLQ algorithm
Returns:
The action of the calling program depends on the return value
SYMM_MATVECXR: multiply A*x and put the results in resid
SYMM_MATVECVZ: multiply A*v and put the results in z
SYMM_MSOLVE1: multiply A(-1)*resid and put the results in z
SYMM_MSOLVE2: multiply A(-1)*r2 and put the results in z
SYMM_MSOLVEB: multiply A(-1)*rhs and put the results in z
SYMM_ERROR: error during SYMMLQ
SYMM_DONE: SYMMLQ is finished, convergence has occurred
Notes:
The first time that BSpar_bcg is called, cur_step and
cur_phase should be 0 and conv should be FALSE.
+*/
int BSpar_symmlq(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
FLOAT *v, FLOAT *w, FLOAT *r2, FLOAT *cg_beta, FLOAT *cg_alpha, FLOAT *obeta,
FLOAT *gbar, FLOAT *dbar, FLOAT *rhs1, FLOAT *rhs2, FLOAT *bstep,
FLOAT *snprod, FLOAT *beta1, FLOAT *tnorm, FLOAT *ynorm2, FLOAT *bestnm,
int *cgpt, int conv, int *cur_step, int *cur_phase, BSprocinfo *procinfo)
{
int i;
FLOAT t1, t2;
FLOAT s;
FLOAT gamma, cs, sn, delta, epsln;
FLOAT ynorm, anorm, epsa, gpert, diag;
FLOAT elqnrm, qrnorm, cgnorm;
FLOAT zbar;
FLOAT eps;
char EPS = 'E';
eps = DLAMCH(&EPS);
switch((*cur_phase)) {
case 0: {
/* compute A*x=r */
(*cur_phase) = 1;
(*cur_step) = 0;
return(SYMM_MATVECXR);
}
case 1: {
/* compute r and then z = M(-1)*r */
for (i=0;i<num_cols;i++) {
resid[i] = rhs[i] - resid[i];
}
(*cur_phase) = 2;
return(SYMM_MSOLVE1);
}
case 2: {
/* compute beta1 and then z = A*v */
(*cg_beta) = BSpar_ip(num_cols,z,resid,procinfo); CHKERRN(0);
if ((*cg_beta) < 0.0) return(SYMM_ERROR);
(*cg_beta) = sqrt((*cg_beta));
(*beta1) = (*cg_beta);
s = 1.0/(*cg_beta);
for (i=0;i<num_cols;i++) {
v[i] = s*z[i];
}
(*cur_phase) = 3;
return(SYMM_MATVECVZ);
}
case 3: {
/* compute cg_alpha, z, r2, and then z = M(-1)*r2 */
(*cg_alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
s = -(*cg_alpha)/(*cg_beta);
for (i=0;i<num_cols;i++) {
z[i] += s*resid[i];
}
t1 = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
t2 = BSpar_ip(num_cols,v,v,procinfo); CHKERRN(0);
s = -t1/t2;
for (i=0;i<num_cols;i++) {
z[i] += s*v[i];
}
for (i=0;i<num_cols;i++) {
r2[i] = z[i];
}
(*cur_phase) = 4;
return(SYMM_MSOLVE2);
}
case 4: {
/* compute beta */
(*obeta) = (*cg_beta);
(*cg_beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERRN(0);
if ((*cg_beta) < 0.0) return(SYMM_ERROR);
(*cg_beta) = sqrt((*cg_beta));
/* initialize accumulators */
(*gbar) = (*cg_alpha);
(*dbar) = (*cg_beta);
(*bstep) = 0.0;
(*snprod) = 1.0;
(*rhs1) = (*beta1);
(*rhs2) = 0.0;
(*tnorm) = 0.0;
(*ynorm2) = 0.0;
for (i=0;i<num_cols;i++) {
w[i] = 0.0;
}
/* estimate norms */
(*tnorm) += (*cg_alpha)*(*cg_alpha) + 2.0*(*cg_beta)*(*cg_beta);
anorm = sqrt(*tnorm);
ynorm = sqrt(*ynorm2);
epsa = anorm*eps;
gpert = (*gbar) + epsa;
if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
diag = fabs(gpert);
elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
qrnorm = (*snprod)*(*beta1);
cgnorm = qrnorm * (*cg_beta)/diag;
if (cgnorm < elqnrm) {
(*cgpt) = TRUE;
(*bestnm) = cgnorm;
} else {
(*cgpt) = FALSE;
(*bestnm) = elqnrm;
}
/* compute v and then z = A*v */
s = 1.0/(*cg_beta);
for (i=0;i<num_cols;i++) {
v[i] = s*z[i];
}
(*cur_phase) = 6;
return(SYMM_MATVECVZ);
}
case 5: {
/* compute beta */
(*obeta) = (*cg_beta);
(*cg_beta) = BSpar_ip(num_cols,z,r2,procinfo); CHKERRN(0);
if ((*cg_beta) < 0.0) return(SYMM_ERROR);
(*cg_beta) = sqrt((*cg_beta));
/* get x */
gamma = sqrt((*gbar)*(*gbar)+(*obeta)*(*obeta));
cs = (*gbar)/gamma;
sn = (*obeta)/gamma;
delta = cs*(*dbar) + sn*(*cg_alpha);
(*gbar) = sn*(*dbar) - cs*(*cg_alpha);
epsln = sn*(*cg_beta);
(*dbar) = -cs*(*cg_beta);
s = (*rhs1)/gamma;
t1 = s*cs;
t2 = s*sn;
for (i=0;i<num_cols;i++) {
x[i] = (w[i]*t1 + v[i]*t2) + x[i];
w[i] = w[i]*sn - v[i]*cs;
}
(*rhs1) = (*rhs2) - delta*s;
(*rhs2) = -epsln*s;
(*bstep) = (*snprod)*cs*s + (*bstep);
(*snprod) = (*snprod)*sn;
(*ynorm2) += s*s;
/* estimate norms */
(*tnorm) += (*cg_alpha)*(*cg_alpha) + 2.0*(*cg_beta)*(*cg_beta);
anorm = sqrt(*tnorm);
ynorm = sqrt(*ynorm2);
epsa = anorm*eps;
gpert = (*gbar) + epsa;
if ((*gbar) < 0.0) gpert = (*gbar) - epsa;
diag = fabs(gpert);
elqnrm = sqrt((*rhs1)*(*rhs1)+(*rhs2)*(*rhs2));
qrnorm = (*snprod)*(*beta1);
cgnorm = qrnorm * (*cg_beta)/diag;
if (cgnorm < elqnrm) {
(*cgpt) = TRUE;
(*bestnm) = cgnorm;
} else {
(*cgpt) = FALSE;
(*bestnm) = elqnrm;
}
/* compute v and then z = A*v */
s = 1.0/(*cg_beta);
for (i=0;i<num_cols;i++) {
v[i] = s*z[i];
}
(*cur_phase) = 6;
(*cur_step)++;
return(SYMM_MATVECVZ);
}
case 6: {
if (conv) {
/* do next to last finishing step and then z = A*rhs */
if (cgpt) {
zbar = (*rhs1)/(*gbar);
(*bstep) = (*snprod)*zbar + (*bstep);
ynorm = sqrt((*ynorm2) + zbar*zbar);
for (i=0;i<num_cols;i++) {
x[i] += zbar*w[i];
}
}
(*bstep) = (*bstep) / (*beta1);
for (i=0;i<num_cols;i++) {
z[i] = rhs[i];
}
(*cur_phase) = 7;
return(SYMM_MSOLVEB);
} else {
/* orthogonalize z and then z = A*r2 */
s = -(*cg_beta)/(*obeta);
for (i=0;i<num_cols;i++) {
z[i] += s*resid[i];
}
(*cg_alpha) = BSpar_ip(num_cols,v,z,procinfo); CHKERRN(0);
s = -(*cg_alpha)/(*cg_beta);
for (i=0;i<num_cols;i++) {
z[i] += s*r2[i];
}
for (i=0;i<num_cols;i++) {
resid[i] = r2[i];
}
for (i=0;i<num_cols;i++) {
r2[i] = z[i];
}
(*cur_phase) = 5;
return(SYMM_MSOLVE2);
}
}
case 7: {
for (i=0;i<num_cols;i++) {
x[i] += (*bstep)*z[i];
}
return(SYMM_DONE);
}
default: {
return(SYMM_ERROR);
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1