#include "BSprivate.h"

/*+ BSpar_bcg - conjugate gradient on a block of vectors; reverse communication

    Input Parameters:
.   num_cols - the length of the vector on this processor
.   rhs - the contiguous block of rhs vectors on this processor
          length = num_cols * nBS
.   x - the contiguous block of solution vectors on this processor
          length = num_cols * nBS
.   resid - the contiguous block of residual vectors on this processor
          length = num_cols * nBS
.   z - the contiguous block of work vectors on this processor
          length = num_cols * nBS
.   p - the contiguous block of work vectors on this processor
          length = num_cols * nBS
.   cg_beta - the vector of cg coefficients
          length = nBS
.   cg_alpha - the vector of cg coefficients
          length = nBS
.   cur_step - the current cg step
.   cur_phase - the current cg phase
.   nBS - the number of columns in the block
.   procinfo - the usual processor stuff

    Output Parameters:
.   many of the input parameters are changed, it is just the usual
    CG algorithm

    Returns:
    The action of the calling program depends on the return value
    CG_MATVECR: multiply A*x and put the results in resid
    CG_MATVECZ: multiply A*p and put the results in z
    CG_MSOLVE: multiply A(-1)*resid and put the results in z
    CG_ERROR: Error during CG, indefinite matrix encountered

    Notes:
    This is NOT the block conjugate gradient algorithm.  It is
    a separate conjugate gradient run on each vector, but the
    operations are blocked for efficiency.

    The first time that BSpar_bcg is called, cur_step and
    cur_phase should be 0.

 +*/
int BSpar_bcg(int num_cols, FLOAT *rhs, FLOAT *x, FLOAT *resid, FLOAT *z,
	FLOAT *p, FLOAT *cg_beta, FLOAT *cg_alpha, int *cur_step, int *cur_phase,
	int nBS, BSprocinfo *procinfo) 
{
	int	i, j;
	FLOAT	bk, *bknum;
	FLOAT	ak;
	FLOAT	*t_z, *t_p, *t_resid, *t_x;

	switch((*cur_phase)) {
		case 0: {
			(*cur_phase) = 1;
			return(CG_MATVECR);
		}
		case 1: {
			for (i=0;i<num_cols*nBS;i++) {
				resid[i] = rhs[i] - resid[i];
			}
			(*cur_phase) = 2;
			(*cur_step) = 1;
			return(CG_MSOLVE);
		}
		case 2: {
			MY_MALLOCN(bknum,(FLOAT *),sizeof(FLOAT)*nBS,1);
			BSpar_bip(num_cols,z,resid,nBS,bknum,procinfo); CHKERRN(0);
			for (j=0;j<nBS;j++) {
				t_z = &(z[j*num_cols]);
				t_p = &(p[j*num_cols]);
				if (bknum[j] < 0.0) return(CG_ERROR);
				if ((*cur_step) == 1) {
					for (i=0;i<num_cols;i++) {
						t_p[i] = t_z[i];
					}
				} else {
					bk = bknum[j]/cg_beta[j];
					for (i=0;i<num_cols;i++) {
						t_p[i] = t_z[i] + bk*t_p[i];
					}
				}
				cg_beta[j] = bknum[j];
			}
			MY_FREE(bknum);
			(*cur_phase) = 3;
			return(CG_MATVECZ);
		}
		case 3: {
			BSpar_bip(num_cols,p,z,nBS,cg_alpha,procinfo); CHKERRN(0);
			for (j=0;j<nBS;j++) {
				t_z = &(z[j*num_cols]);
				t_resid = &(resid[j*num_cols]);
				t_p = &(p[j*num_cols]);
				t_x = &(x[j*num_cols]);
				if (cg_alpha[j] < 0.0) return(CG_ERROR);
				ak = cg_beta[j]/cg_alpha[j];
				for (i=0;i<num_cols;i++) {
					t_x[i] += ak*t_p[i];
					t_resid[i] -= ak*t_z[i];
				}
			}
			(*cur_phase) = 2;
			(*cur_step)++;
			return(CG_MSOLVE);
		}
		default: {
			return(CG_ERROR);
		}
	}
}


syntax highlighted by Code2HTML, v. 0.9.1