#include "BSprivate.h"
/*+ BSpar_bcg - conjugate gradient on a block of vectors; reverse communication
Input Parameters:
. num_cols - the length of the vector on this processor
. rhs - the contiguous block of rhs vectors on this processor
length = num_cols * nBS
. x - the contiguous block of solution vectors on this processor
length = num_cols * nBS
. resid - the contiguous block of residual vectors on this processor
length = num_cols * nBS
. z - the contiguous block of work vectors on this processor
length = num_cols * nBS
. p - the contiguous block of work vectors on this processor
length = num_cols * nBS
. cg_beta - the vector of cg coefficients
length = nBS
. cg_alpha - the vector of cg coefficients
length = nBS
. cur_step - the current cg step
. cur_phase - the current cg phase
. nBS - the number of columns in the block
. procinfo - the usual processor stuff
Output Parameters:
. many of the input parameters are changed, it is just the usual
CG algorithm
Returns:
The action of the calling program depends on the return value
CG_MATVECR: multiply A*x and put the results in resid
CG_MATVECZ: multiply A*p and put the results in z
CG_MSOLVE: multiply A(-1)*resid and put the results in z
CG_ERROR: Error during CG, indefinite matrix encountered
Notes:
This is NOT the block conjugate gradient algorithm. It is
a separate conjugate gradient run on each vector, but the
operations are blocked for efficiency.
The first time that BSpar_bcg is called, cur_step and
cur_phase should be 0.
+*/
int BSpar_bcg(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
FLOAT *p, FLOAT *cg_beta, FLOAT *cg_alpha, int *cur_step, int *cur_phase,
int nBS, BSprocinfo *procinfo)
{
int i, j;
FLOAT bk, *bknum;
FLOAT ak;
FLOAT *t_z, *t_p, *t_resid, *t_x;
switch((*cur_phase)) {
case 0: {
(*cur_phase) = 1;
return(CG_MATVECR);
}
case 1: {
for (i=0;i<num_cols*nBS;i++) {
resid[i] = rhs[i] - resid[i];
}
(*cur_phase) = 2;
(*cur_step) = 1;
return(CG_MSOLVE);
}
case 2: {
MY_MALLOCN(bknum,(FLOAT *),sizeof(FLOAT)*nBS,1);
BSpar_bip(num_cols,z,resid,nBS,bknum,procinfo); CHKERRN(0);
for (j=0;j<nBS;j++) {
t_z = &(z[j*num_cols]);
t_p = &(p[j*num_cols]);
if (bknum[j] < 0.0) return(CG_ERROR);
if ((*cur_step) == 1) {
for (i=0;i<num_cols;i++) {
t_p[i] = t_z[i];
}
} else {
bk = bknum[j]/cg_beta[j];
for (i=0;i<num_cols;i++) {
t_p[i] = t_z[i] + bk*t_p[i];
}
}
cg_beta[j] = bknum[j];
}
MY_FREE(bknum);
(*cur_phase) = 3;
return(CG_MATVECZ);
}
case 3: {
BSpar_bip(num_cols,p,z,nBS,cg_alpha,procinfo); CHKERRN(0);
for (j=0;j<nBS;j++) {
t_z = &(z[j*num_cols]);
t_resid = &(resid[j*num_cols]);
t_p = &(p[j*num_cols]);
t_x = &(x[j*num_cols]);
if (cg_alpha[j] < 0.0) return(CG_ERROR);
ak = cg_beta[j]/cg_alpha[j];
for (i=0;i<num_cols;i++) {
t_x[i] += ak*t_p[i];
t_resid[i] -= ak*t_z[i];
}
}
(*cur_phase) = 2;
(*cur_step)++;
return(CG_MSOLVE);
}
default: {
return(CG_ERROR);
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1