#include "BSprivate.h" /*@ BSb_for_solve - Forward triangular matrix solution on a block of vectors Input Parameters: . A - The sparse matrix . x - The contiguous block of rhs's . comm - The communication structure for A . block_size - the number of rhs's . procinfo - the usual processor information Output Parameters: . b - on exit these vectors contain the solution vector Returns: void @*/ void BSb_for_solve(BSpar_mat *A, FLOAT *x, BScomm *comm, int block_size, BSprocinfo *procinfo) { BMphase *to_phase, *from_phase; BMmsg *msg; int i, j, k, n; int cl_ind, in_ind; int count, size, ind, num_cols; int *row; FLOAT *nz; BScl_2_inode *clique2inode; BSnumbering *color2clique; BSinode *inodes; int *data_ptr, msg_len; FLOAT *msg_buf, *matrix; FLOAT *work; FLOAT *xptr, *wptr; FLOAT **xoff; char UP = 'U'; char TR = 'T'; char NTR = 'N'; char SIDE = 'L'; char ND = 'N'; FLOAT one = 1.0; FLOAT zero = 0.0; if((!A->icc_storage)||(procinfo->single)) { /* No ILU version or single version so call BSfor_solve block_size times */ n = A->num_rows; for (i=0;isingle) { BSfor_solve1(A,&(x[n*i]),comm,procinfo); CHKERR(0); } else { BSfor_solve(A,&(x[n*i]),comm,procinfo); CHKERR(0); } } return; } color2clique = A->color2clique; clique2inode = A->clique2inode; inodes = A->inodes->list; /* get some work space */ MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*A->num_rows*block_size,1); /* post for all messages */ BMinit_comp_msg(comm->from_msg,procinfo); CHKERR(0); /* calculate x offsets */ MY_MALLOC(xoff,(FLOAT **),sizeof(FLOAT *)*block_size,1); for (i=0;inum_rows]); } /* now do this phase by phase */ for (i=0;ilength-1;i++) { /* find my portion of the solution using the cliques on the diagonal */ for (cl_ind=color2clique->numbers[i]; cl_indnumbers[i+1];cl_ind++) { if (procinfo->my_id == clique2inode->proc[cl_ind]) { /* first, multiply the clique */ /* the clique is stored, inverted, in the upper triangle */ size = clique2inode->d_mats[cl_ind].size; ind = clique2inode->d_mats[cl_ind].local_ind; matrix = clique2inode->d_mats[cl_ind].matrix; DTRMM(&SIDE,&UP,&TR,&ND,&size,&block_size,&one,matrix, &size,&(x[ind]),&(A->num_rows)); } } /* now send my messages */ to_phase = BMget_phase(comm->to_msg,i); CHKERR(0); msg = NULL; while ((msg = BMnext_msg(to_phase,msg)) != NULL) { CHKERR(0); msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0); data_ptr = BMget_user(msg,&msg_len); CHKERR(0); for (j=0;jnumbers[i]; cl_indnumbers[i+1];cl_ind++) { if (procinfo->my_id == clique2inode->proc[cl_ind]) { ind = clique2inode->d_mats[cl_ind].local_ind; /* multiply the inodes */ for (in_ind=clique2inode->inode_index[cl_ind]; in_indinode_index[cl_ind+1];in_ind++) { row = inodes[in_ind].row_num; nz = inodes[in_ind].nz; size = inodes[in_ind].length; num_cols = inodes[in_ind].num_cols; if (size > 0) { DGEMM(&NTR,&NTR,&size,&block_size,&num_cols,&one,nz, &size,&(x[ind]),&(A->num_rows),&zero,work,&size); for (j=0;jfrom_msg,i); CHKERR(0); while ((msg = BMrecv_msg(from_phase)) != NULL) { CHKERR(0); msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERR(0); data_ptr = BMget_user(msg,&msg_len); CHKERR(0); msg_len = BMget_msg_size(msg); CHKERR(0); msg_len /= block_size; count = 0; for (cl_ind=data_ptr[0];cl_ind<=data_ptr[1];cl_ind++) { for (in_ind=clique2inode->inode_index[cl_ind]; in_indinode_index[cl_ind+1];in_ind++) { row = inodes[in_ind].row_num; nz = inodes[in_ind].nz; size = inodes[in_ind].length; num_cols = inodes[in_ind].num_cols; if (size > 0) { DGEMM(&NTR,&NTR,&size,&block_size,&num_cols,&one,nz, &size,&(msg_buf[count]),&msg_len,&zero,work, &size); for (j=0;jto_msg,procinfo); CHKERR(0); MLOG_flop((2*A->local_nnz)); }