#include "BSprivate.h" /*@ BSback_solve - Backward triangular matrix solution on a single vector Input Parameters: . A - The sparse matrix . x - The rhs . comm - The communication structure for A . procinfo - the usual processor information Output Parameters: . x - on exit contains the solution vector Returns: void @*/ void BSback_solve(BSpar_mat *A, FLOAT *x, BScomm *comm, BSprocinfo *procinfo) { BMcomp_msg *from_msg, *to_msg; BMphase *to_phase, *from_phase; BMmsg *msg; int i, j, k; int cl_ind, in_ind; int count, size, ind, num_cols; int *row; FLOAT *nz; BScl_2_inode *clique2inode = A->clique2inode; BSnumbering *color2clique = A->color2clique; BSinode *inodes = A->inodes->list; int *in_index = clique2inode->inode_index; int *proc = clique2inode->proc; BSdense *d_mats = clique2inode->d_mats; int *data_ptr, msg_len; FLOAT *msg_buf, *matrix; int my_id = procinfo->my_id; FLOAT *work; char UP = 'U'; char TR = 'T'; char NTR = 'N'; char ND = 'N'; int *col2cl = color2clique->numbers; int length = color2clique->length; int start, finish, symmetric; int ione = 1; FLOAT one = 1.0; FLOAT zero = 0.0; FLOAT minus_one = -1.0; FLOAT DDOT(); int *gnum = A->global_row_num->numbers; int *iperm = A->inv_perm->perm; FLOAT time0, gmev_time, trmv_time, other_time; gmev_time = 0.0; trmv_time = 0.0; other_time = 0.0; time0 = MPI_Wtime(); /* Is the symmetric data structure used? */ symmetric = A->icc_storage; if(symmetric) { from_msg = comm->to_msg; /* we do mean to switch these */ to_msg = comm->from_msg; } else { from_msg = comm->from_msg; /* do not switch for ILU case */ to_msg = comm->to_msg; } /* get some work space */ MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*A->num_rows,1); /* post for all messages */ BMinit_comp_msg(from_msg,procinfo); CHKERR(0); /* now do this phase by phase */ for (i=length-2;i>=0;i--) { start = col2cl[i]; finish = col2cl[i+1]; if(!symmetric) { /* invert the diagonals and find the answers */ for (cl_ind=start;cl_indd_mats[cl_ind].size; ind = clique2inode->d_mats[cl_ind].local_ind; matrix = clique2inode->d_mats[cl_ind].matrix; /* can't do much better (likely) on this DGEMV */ DGEMV(&NTR,&size,&size,&one,matrix,&size,&(x[ind]),&ione,&zero, work,&ione); for (k=0; k 0) { other_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); #ifdef MY_BLAS_DGEMV_ON if (num_cols > DGEMV_UNROLL_LVL) { for (k=0;k 0) { other_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); #ifdef MY_BLAS_DGEMV_ON if (num_cols > DGEMV_UNROLL_LVL) { for (k=0;k=0; j--) { if (gnum[iperm[row[j]]] > inodes[in_ind].gcol_num) size--; else break; } if (size > 0) { other_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); #ifdef MY_BLAS_DGEMV_ON if (num_cols > DGEMV_UNROLL_LVL) { DGEMV(&NTR,&size,&num_cols,&one,nz,&length,&(x[ind]), &ione,&zero,work,&ione); for (k=0;kinode_index[cl_ind]; in_indinode_index[cl_ind+1]; in_ind++) { row = inodes[in_ind].row_num; nz = inodes[in_ind].nz; size = inodes[in_ind].length; length = inodes[in_ind].length; num_cols = inodes[in_ind].num_cols; for (j=length-1; j>=0; j--) { if (gnum[iperm[row[j]]] > inodes[in_ind].gcol_num) size--; else break; } if (size > 0) { other_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); #ifdef MY_BLAS_DGEMV_ON if (num_cols > DGEMV_UNROLL_LVL) { DGEMV(&NTR,&size,&num_cols,&one,nz,&length,&(msg_buf[count]), &ione,&zero,work,&ione); for (k=0;kd_mats[cl_ind].size; other_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); #ifdef MY_BLAS_DTRMV_ON MY_DTRMV_N_U(size,d_mats[cl_ind].matrix,size, &(x[d_mats[cl_ind].local_ind]),work); #else DTRMV(&UP,&NTR,&ND,&size,d_mats[cl_ind].matrix,&size, &(x[d_mats[cl_ind].local_ind]),&ione); #endif trmv_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); } } } } MY_FREE(work); /* wait for all of the sent messages to finish */ BMfinish_comp_msg(to_msg,procinfo); CHKERR(0); MLOG_flop((2*A->local_nnz)); other_time += MPI_Wtime() - time0; time0 = MPI_Wtime(); printf("BSb_solve: other = %e, gmev = %e, trmv = %e\n", other_time,gmev_time,trmv_time); }