#include "BSprivate.h" /* a data structure used only in factoring */ /* nodes in a list of i-nodes that need to be received/sent during */ /* factorization */ typedef struct __list_type { int length; /* length of the columns in the i-node */ int cur_len; /* don't know anymore */ FLOAT *d_buffer; /* don't know anymore */ int *i_buffer; /* don't know anymore */ int buf_size; /* don't know anymore */ int cl_ind; /* index of the clique, if local */ int in_ind; /* index of the i-node if local */ } list_type; #include "BStree.h" extern void BSilu_updt_matrix1(BSpar_mat *,int,BStree,BStree,int,int *,FLOAT *, int, int *, FLOAT *, int , int , BScl_2_inode *, BSinode *, BSprocinfo *); /*+ BSilu_factor1 - Compute the incomplete LU factor of a matrix (no inode version) Input Parameters: . A - The sparse matrix to be factored . comm - the communication structure of the factoring . procinfo - the usual processor info Output Parameters: . A - The factored sparse matrix Returns: 0 if successful, otherwise a negative number whose absolute value is the row number of the color (less one) where the failure occured. +*/ typedef struct __col_list { int count; int *col; FLOAT **nz; } col_list; int BSilu_factor1(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo) { BMphase *to_phase, *from_phase; BMmsg *msg; int i, j, k; int cl_ind, in_ind; int count, size, ind; int length, gnum; int found; int dummy = 0; int *intptr, *intptr2, *i_buffer; FLOAT *d_buffer; BScl_2_inode *clique2inode; BSnumbering *color2clique; BSinode *inodes; int *data_ptr, data_len, msg_len; FLOAT *msg_buf; FLOAT *nzptr; FLOAT t; int info, fact_error; BStree recv_tree; BStree_ptr tree_node_ptr; BStree inode_tree; list_type *list_data_ptr; int *g_row_num, *iperm; int dd_size, *ii_buffer; FLOAT *dd_buffer; BStree rowtree; BStree_ptr node_ptr; col_list *col_list_ptr; BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0); BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0); color2clique = A->color2clique; clique2inode = A->clique2inode; inodes = A->inodes->list; g_row_num = A->global_row_num->numbers; iperm = A->inv_perm->perm; MY_INIT_TREE(inode_tree,(sizeof(int)*2)); for (i=0;ilength-1;i++) { for (cl_ind=color2clique->numbers[i]; cl_indnumbers[i+1];cl_ind++) { for (in_ind=clique2inode->inode_index[cl_ind]; in_indinode_index[cl_ind+1];in_ind++) { MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num), found,tree_node_ptr,dummy); data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr); data_ptr[0] = in_ind; data_ptr[1] = cl_ind; } } } /* Create a rowtree - a data structure that stores all the cols of all */ /* local rows (ILU) */ MY_INIT_TREE(rowtree,sizeof(col_list)); for (j=0; jinodes->length; j++) { for (i=0; icount++; } else { col_list_ptr->count = 1; /* A->max_local_row_length is used to store max_row_len */ MY_MALLOCN(col_list_ptr->col,(int *), sizeof(int)*A->max_local_row_length,1); MY_MALLOCN(col_list_ptr->nz,(FLOAT **), sizeof(FLOAT *)*A->max_local_row_length,1); } col_list_ptr->col[col_list_ptr->count-1]=inodes[j].gcol_num; col_list_ptr->nz[col_list_ptr->count-1]=&(inodes[j].nz[i]); } } fact_error = FALSE; /* now do this phase by phase */ for (i=0;ilength-1;i++) { to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0); from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0); /* send my part of each column in this color */ /* use the "from" part here */ msg = NULL; while ((msg = BMnext_msg(from_phase,msg)) != NULL) { CHKERRN(0); /* allocate space for the message */ data_ptr = BMget_user(msg,&data_len); CHKERRN(0); in_ind = data_ptr[1]; size = inodes[in_ind].length*sizeof(FLOAT) + inodes[in_ind].length*sizeof(int); MY_MALLOCN(msg_buf,(FLOAT *),size,1); BMset_msg_ptr(msg,msg_buf); CHKERRN(0); BMset_msg_size(msg,size); CHKERRN(0); count = 0; nzptr = inodes[in_ind].nz; for (k=0;knumbers[i]; cl_indnumbers[i+1];cl_ind++) { if (procinfo->my_id == clique2inode->proc[cl_ind]) { t = *(clique2inode->d_mats[cl_ind].matrix); if (t == 0.0) { fact_error = TRUE; printf("ILU Error: zero pivot encountered.\n"); } else { *(clique2inode->d_mats[cl_ind].matrix) = 1/t; } MLOG_flop(1); } } /* figure out what I am going to receive and make room for it */ /* use the "to" part here */ MY_INIT_TREE(recv_tree,sizeof(list_type)); msg = NULL; while ((msg = BMnext_msg(to_phase,msg)) != NULL) { CHKERRN(0); /* search list for this number */ data_ptr = BMget_user(msg,&data_len); CHKERRN(0); gnum = data_ptr[0]; length = data_ptr[1]; MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy); list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); if (found) { list_data_ptr->length += length; } else { list_data_ptr->length = length; list_data_ptr->cur_len = 0; list_data_ptr->d_buffer = NULL; list_data_ptr->i_buffer = NULL; list_data_ptr->cl_ind = -1; list_data_ptr->in_ind = -1; } } CHKERRN(0); /* add any of my parts to the list, if necessary */ for (cl_ind=color2clique->numbers[i]; cl_indnumbers[i+1];cl_ind++) { /* only work on columns that I own */ if (clique2inode->proc[cl_ind] != procinfo->my_id) continue; for (in_ind=clique2inode->inode_index[cl_ind]; in_indinode_index[cl_ind+1];in_ind++) { MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num), found,tree_node_ptr,dummy); list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); if (found) { list_data_ptr->cl_ind = cl_ind; list_data_ptr->in_ind = in_ind; list_data_ptr->length += inodes[in_ind].length; } else { list_data_ptr->length = inodes[in_ind].length; list_data_ptr->cur_len = 0; list_data_ptr->d_buffer = NULL; list_data_ptr->i_buffer = NULL; list_data_ptr->cl_ind = cl_ind; list_data_ptr->in_ind = in_ind; } } } /* make sure that no one has been received out of order */ if (procinfo->error_check) { MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->cl_ind == -1) { MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n"); } } } /* allocate space for each whole column */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); list_data_ptr->buf_size = (list_data_ptr->length*sizeof(FLOAT)) + (list_data_ptr->length*sizeof(int)); MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *), list_data_ptr->buf_size,2); list_data_ptr->i_buffer = (int *) &(list_data_ptr->d_buffer[list_data_ptr->length]); } /* put my part of the column into the collected column */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); in_ind = list_data_ptr->in_ind; length = inodes[in_ind].length; nzptr = inodes[in_ind].nz; d_buffer = list_data_ptr->d_buffer; for (k=0;klength; i_buffer = list_data_ptr->i_buffer; intptr = inodes[in_ind].row_num; /* while copying, convert i_buffer from new local to new global */ for (j=0;jcur_len += length; } /* collect all the parts of "my" columns */ while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) { CHKERRN(0); BMcheck_on_async_block(from_phase); CHKERRN(0); msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0); BMset_msg_ptr(msg,NULL); CHKERRN(0); data_ptr = BMget_user(msg,&msg_len); CHKERRN(0); gnum = data_ptr[0]; length = data_ptr[1]; MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr); if (found) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]); for (k=0;klength; i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]); intptr = (int *) &(msg_buf[length]); for (j=0;jcur_len += length; } else { MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n"); } } CHKERRN(0); BMfree_block_msg(to_phase); CHKERRN(0); BMfinish_on_async_block(from_phase); CHKERRN(0); /* see if the factorizations went okay, if not, then return */ /* we have to wait until here to make sure all messages are received */ /* make anything that needs free'ing is free'ed */ /* everyone agrees on success/error */ GISUM(&fact_error,1,&info,procinfo->procset); if (fact_error != 0) { /* free up space and return with the negative phase number */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); MY_FREE(list_data_ptr->d_buffer); } MY_FREE_TREE(recv_tree); return(-(i+1)); } /* factor these columns */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->length <= 0) continue; cl_ind = list_data_ptr->cl_ind; t = *(clique2inode->d_mats[cl_ind].matrix); /* The following part is modified (ILU) */ for (j=0;jlength;j++) { if (list_data_ptr->i_buffer[j] > clique2inode->g_offset[cl_ind]) (list_data_ptr->d_buffer[j]) *= t; } MLOG_flop(list_data_ptr->length); } /* The following part is modified (ILU) */ /* send these columns out */ msg = NULL; while ((msg = BMnext_msg(to_phase,msg)) != NULL) { CHKERRN(0); data_ptr = BMget_user(msg,&data_len); CHKERRN(0); gnum = data_ptr[0]; MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr); if (found) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_SEARCH_TREE(rowtree,gnum,found,node_ptr); if (found) { col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr); dd_size = col_list_ptr->count*sizeof(FLOAT)+ list_data_ptr->buf_size; MY_MALLOCN(dd_buffer,(FLOAT *),dd_size,1); for (j=0; jlength; j++) dd_buffer[j] = list_data_ptr->d_buffer[j]; k = j; for (j=0; jcount; j++) dd_buffer[k+j] = col_list_ptr->nz[j][0]; ii_buffer = (int *) &(dd_buffer[k+j]); for (j=0; jlength; j++) ii_buffer[j] = list_data_ptr->i_buffer[j]; BMset_msg_ptr(msg,dd_buffer); CHKERRN(0); BMset_msg_size(msg,dd_size); CHKERRN(0); BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0); BMset_msg_ptr(msg,NULL); CHKERRN(0); MY_FREE(dd_buffer); } else { printf("Error: Something wrong with rowtree.\n"); } } else { MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n"); } } CHKERRN(0); BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0); /* retrieve my part of the column and put it in the data structure */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->length <= 0) continue; in_ind = list_data_ptr->in_ind; length = inodes[in_ind].length; nzptr = inodes[in_ind].nz; d_buffer = list_data_ptr->d_buffer; for (k=0;klength; } /* now update the remaining matrix using the local columns */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->length <= 0) continue; in_ind = list_data_ptr->in_ind; gnum = inodes[in_ind].gcol_num; MY_SEARCH_TREE(rowtree,gnum,found,node_ptr); if (found) { col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr); MY_MALLOCN(dd_buffer,(FLOAT *), col_list_ptr->count*sizeof(FLOAT),1); for (j=0; jcount; j++) dd_buffer[j] = col_list_ptr->nz[j][0]; BSilu_updt_matrix1(A,gnum,rowtree, inode_tree,list_data_ptr->length, list_data_ptr->i_buffer,list_data_ptr->d_buffer, col_list_ptr->count,col_list_ptr->col,dd_buffer, color2clique->numbers[i+1], color2clique->numbers[color2clique->length-1],clique2inode, inodes,procinfo); CHKERRN(0); MY_FREE(dd_buffer); } else { printf("Error: Something wrong with rowtree.\n"); } } /* The following part is modified (ILU) */ /* receive the incoming columns and update the remaining matrix */ /* strip out the "local" part and put it in the matrix */ while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) { CHKERRN(0); BMcheck_on_async_block(to_phase); CHKERRN(0); msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0); BMset_msg_ptr(msg,NULL); CHKERRN(0); data_ptr = BMget_user(msg,&msg_len); CHKERRN(0); cl_ind = data_ptr[0]; in_ind = data_ptr[1]; /* let gnum be first global row location of my part */ gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]]; length = inodes[in_ind].length; msg_len = BMget_msg_size(msg); CHKERRN(0); msg_len /= 2*sizeof(FLOAT)+sizeof(int); /* find my local part */ i_buffer = (int *) &(msg_buf[2*msg_len]); for (ind=0; indnumbers[i+1], color2clique->numbers[color2clique->length-1],clique2inode, inodes,procinfo); CHKERRN(0); } CHKERRN(0); BMfree_block_msg(from_phase); CHKERRN(0); BMfinish_on_async_block(to_phase); CHKERRN(0); /* now free up the list */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); MY_FREE(list_data_ptr->d_buffer); } MY_FREE_TREE(recv_tree); } /* now free up the col list */ MY_FIRST_IN_TREE(rowtree,node_ptr); while (! IS_TREE_PTR_NULL(node_ptr)) { col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr); MY_NEXT_IN_TREE(node_ptr); MY_FREE(col_list_ptr->col); MY_FREE(col_list_ptr->nz); } MY_FREE_TREE(rowtree); MY_FREE_TREE(inode_tree); BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0); BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0); return(0); } /*+ BSilu_updt_matrix1 - Update the matrix using a bunch of i-nodes Input Parameters: A - The sparse matrix to be factored colnum - the global column number rowtree - a data structure that stores the local rows inode_tree - a data structure that stores the clique index and inode index for each node len - the length of the factored col for elimination updt_index - the indices of the factored colnum updt_inode - the valuess of the factored colnum len2 - the length of the factored row my_inode_ind - the indices of the factored row my_inode - the values of the factores row start - dummy end - dummy Output Parameters: I don't really know what these things do now. Returns: void Notes: This code is very complicated and could be done much better. I keep most part of the code so that change is minimized. It is not necessary though. (ILU) +*/ void BSilu_updt_matrix1(BSpar_mat *A, int colnum, BStree rowtree, BStree inode_tree, int len, int *updt_index, FLOAT *updt_inode, int len2, int *my_inode_ind, FLOAT *my_inode, int start, int end, BScl_2_inode *clique2inode, BSinode *inodes, BSprocinfo *procinfo) { int i, j; int updt_count, offset, toffset; int cl_ind, in_ind; BSpermutation *iperm; FLOAT *work, *matrix; FLOAT work2; int *intptr; FLOAT *nzptr; int tcount, count2, gnum, size; int found; BStree_ptr tree_node_ptr; int *tree_data_ptr; int *rowptr, *row_gnum, *g_iperm; BStree_ptr node_ptr; col_list *col_list_ptr; /* sort the updt_inode */ iperm = BSalloc_permutation(len); CHKERR(0); for (i=0;iperm[i] = i; BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0); MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1); BSiperm_dvec(updt_inode,work,iperm); CHKERR(0); for (j=0;jglobal_row_num->numbers; g_iperm = A->inv_perm->perm; for (updt_count=0;updt_countg_offset[in_ind] > colnum) { /********** update the inode */ tcount = 0; count2 = 0; MY_SEARCH_TREE(rowtree,updt_index[updt_count],found,node_ptr); if (found) { col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr); intptr = col_list_ptr->col; while ((tcountcount)) { if ((my_inode_ind[tcount]==intptr[count2]) & (intptr[count2] > colnum)) { /* A is assumed to be structurally symmetric so that the */ /* following tricky searching is ok. (ILU) */ MY_SEARCH_TREE(inode_tree,intptr[count2],found,tree_node_ptr); tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr); i = tree_data_ptr[0]; nzptr = inodes[i].nz; rowptr = inodes[i].row_num; /* search the "colnum"th row (ILU) for (j1=0; j1g_offset[cl_ind]; offset = updt_index[updt_count]-gnum; matrix = clique2inode->d_mats[cl_ind].matrix; size = clique2inode->d_mats[cl_ind].size; tcount = updt_count; /* work2 is modified (ILU) */ for (j=0; j