#include "BSprivate.h"
/*@ BSback_solve1 - Backward triangular matrix solution on a
single vector
Input Parameters:
. A - The sparse matrix
. x - The rhs
. comm - The communication structure for A
. procinfo - the usual processor information
Output Parameters:
. x - on exit contains the solution vector
Returns:
void
Notes:
We assume that A has no i-nodes or cliques
@*/
void BSback_solve1(BSpar_mat *A, FLOAT *x, BScomm *comm, BSprocinfo *procinfo)
{
BMcomp_msg *from_msg, *to_msg;
BMphase *to_phase, *from_phase;
BMmsg *msg;
int i, j, k;
int cl_ind, in_ind;
int count, size, ind;
int *row;
FLOAT *nz;
BScl_2_inode *clique2inode = A->clique2inode;
BSnumbering *color2clique = A->color2clique;
BSinode *inodes = A->inodes->list;
int *in_index = clique2inode->inode_index;
int *proc = clique2inode->proc;
BSdense *d_mats = clique2inode->d_mats;
int *data_ptr, msg_len;
FLOAT *msg_buf;
int my_id = procinfo->my_id;
int *col2cl = color2clique->numbers;
int length = color2clique->length;
int start, finish, symmetric;
FLOAT t;
int *gnum = A->global_row_num->numbers;
int *iperm = A->inv_perm->perm;
/* Is the symmetric data structure used? */
symmetric = A->icc_storage;
/*
if(!symmetric) {
BSILUback_solve1(A,x,comm,procinfo);
return;
}
*/
if(symmetric) {
from_msg = comm->to_msg; /* we do mean to switch these */
to_msg = comm->from_msg;
} else {
from_msg = comm->from_msg; /* do not switch for ILU case */
to_msg = comm->to_msg;
}
/* post for all messages */
BMinit_comp_msg(from_msg,procinfo); CHKERR(0);
/* now do this phase by phase */
for (i=length-2;i>=0;i--) {
start = col2cl[i];
finish = col2cl[i+1];
if(!symmetric) {
/* invert the diagonals and find the answers */
for (cl_ind=start;cl_ind<finish;cl_ind++) {
if (my_id == proc[cl_ind]) {
/* first, multiply the clique */
/* only do the strictly upper triangular part */
x[d_mats[cl_ind].local_ind] *= *(d_mats[cl_ind].matrix);
}
}
}
/* first send my messages */
/* this will involve computing partial sums */
to_phase = BMget_phase(to_msg,i); CHKERR(0);
msg = NULL;
while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
CHKERR(0);
msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
if(symmetric) {
count = 0;
for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) {
in_ind=in_index[cl_ind];
size = inodes[in_ind].length;
if (size > 0) {
row = inodes[in_ind].row_num;
nz = inodes[in_ind].nz;
t = 0.0;
for (k=0;k<size;k++) t += nz[k]*x[row[k]];
msg_buf[count] = t;
}
count++;
}
} else {
for (j=0; j<msg_len; j++)
msg_buf[j] = x[data_ptr[j]];
}
BMsendf_msg(msg,procinfo); CHKERR(0);
}
CHKERR(0);
/* do some local work, multiply by the i-nodes */
for (cl_ind=start;cl_ind<finish;cl_ind++) {
if (my_id == proc[cl_ind]) {
in_ind=in_index[cl_ind];
size = inodes[in_ind].length;
if (size > 0) {
ind = d_mats[cl_ind].local_ind;
row = inodes[in_ind].row_num;
nz = inodes[in_ind].nz;
if(symmetric) {
t = 0.0;
for (k=0;k<size;k++) t += nz[k]*x[row[k]];
x[ind] -= t;
} else {
t = x[ind];
for (k=0;k<size;k++) {
if (gnum[iperm[row[k]]] < inodes[in_ind].gcol_num) {
x[row[k]] -= t*nz[k];
}
}
}
}
}
}
/* receive my messages and update my rhs */
from_phase = BMget_phase(from_msg,i); CHKERR(0);
while ((msg = BMrecv_msg(from_phase)) != NULL) {
CHKERR(0);
msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0);
data_ptr = BMget_user(msg,&msg_len); CHKERR(0);
if(symmetric) {
msg_len = BMget_msg_size(msg); CHKERR(0);
for (j=0;j<msg_len;j++) x[data_ptr[j]] -= msg_buf[j];
} else {
count = 0;
for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) {
in_ind=in_index[cl_ind];
size = inodes[in_ind].length;
if (size > 0) {
row = inodes[in_ind].row_num;
nz = inodes[in_ind].nz;
t = msg_buf[count];
for (k=0;k<size;k++) {
if (gnum[iperm[row[k]]] < inodes[in_ind].gcol_num) {
x[row[k]] -= t*nz[k];
}
}
}
count++;
}
}
BMfree_msg(msg); CHKERR(0);
}
CHKERR(0);
if(symmetric) {
/* invert the diagonals and find the answers */
for (cl_ind=start;cl_ind<finish;cl_ind++) {
if (my_id == proc[cl_ind]) {
/* first, multiply the clique */
/* only do the strictly upper triangular part */
/* we ASSUME the diagonal is all 1's */
x[d_mats[cl_ind].local_ind] *= *(d_mats[cl_ind].matrix);
}
}
}
}
/* wait for all of the sent messages to finish */
BMfinish_comp_msg(to_msg,procinfo); CHKERR(0);
MLOG_flop((2*A->local_nnz));
}
syntax highlighted by Code2HTML, v. 0.9.1