#include "BSprivate.h" /* a data structure used only in factoring */ /* nodes in a list of i-nodes that need to be received/sent during */ /* factorization */ typedef struct __list_type { int length; /* length of the columns in the i-node */ int cur_len; /* don't know anymore */ FLOAT *d_buffer; /* don't know anymore */ int *i_buffer; /* don't know anymore */ int buf_size; /* don't know anymore */ int cl_ind; /* index of the clique, if local */ int in_ind; /* index of the i-node if local */ } list_type; #include "BStree.h" extern void BSupdt_matrix1(BStree ,int ,int *, FLOAT *,int ,int *,FLOAT *, int ,int ,BScl_2_inode *,BSinode *, BSprocinfo *); /*+ BSfactor1 - Compute the incomplete factor of a matrix (no inode version) Input Parameters: . A - The sparse matrix to be factored . comm - the communication structure of the factoring . procinfo - the usual processor info Output Parameters: . A - The factored sparse matrix Returns: 0 if successful, otherwise a negative number whose absolute value is the row number of the color (less one) where the failure occured. +*/ int BSfactor1(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo) { BMphase *to_phase, *from_phase; BMmsg *msg; int i, j, k; int cl_ind, in_ind; int count, size, ind; int length, gnum; int found; int dummy = 0; int *intptr, *intptr2, *i_buffer; FLOAT *d_buffer; BScl_2_inode *clique2inode; BSnumbering *color2clique; BSinode *inodes; int *data_ptr, data_len, msg_len; FLOAT *msg_buf; FLOAT *nzptr; FLOAT t; int info, fact_error; BStree recv_tree; BStree_ptr tree_node_ptr; BStree inode_tree; list_type *list_data_ptr; int *g_row_num, *iperm; BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0); BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0); color2clique = A->color2clique; clique2inode = A->clique2inode; inodes = A->inodes->list; g_row_num = A->global_row_num->numbers; iperm = A->inv_perm->perm; MY_INIT_TREE(inode_tree,(sizeof(int)*2)); for (i=0;ilength-1;i++) { for (cl_ind=color2clique->numbers[i]; cl_indnumbers[i+1];cl_ind++) { for (in_ind=clique2inode->inode_index[cl_ind]; in_indinode_index[cl_ind+1];in_ind++) { MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num), found,tree_node_ptr,dummy); data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr); data_ptr[0] = in_ind; data_ptr[1] = cl_ind; } } } fact_error = FALSE; /* now do this phase by phase */ for (i=0;ilength-1;i++) { to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0); from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0); /* send my part of each column in this color */ /* use the "from" part here */ msg = NULL; while ((msg = BMnext_msg(from_phase,msg)) != NULL) { CHKERRN(0); /* allocate space for the message */ data_ptr = BMget_user(msg,&data_len); CHKERRN(0); in_ind = data_ptr[1]; size = inodes[in_ind].length*sizeof(FLOAT) + inodes[in_ind].length*sizeof(int); MY_MALLOCN(msg_buf,(FLOAT *),size,1); BMset_msg_ptr(msg,msg_buf); CHKERRN(0); BMset_msg_size(msg,size); CHKERRN(0); count = 0; nzptr = inodes[in_ind].nz; for (k=0;knumbers[i]; cl_indnumbers[i+1];cl_ind++) { if (procinfo->my_id == clique2inode->proc[cl_ind]) { t = *(clique2inode->d_mats[cl_ind].matrix); if (t <= 0.0) { fact_error = TRUE; } else { *(clique2inode->d_mats[cl_ind].matrix) = 1/sqrt(t); } MLOG_flop(1); } } /* figure out what I am going to receive and make room for it */ /* use the "to" part here */ MY_INIT_TREE(recv_tree,sizeof(list_type)); msg = NULL; while ((msg = BMnext_msg(to_phase,msg)) != NULL) { CHKERRN(0); /* search list for this number */ data_ptr = BMget_user(msg,&data_len); CHKERRN(0); gnum = data_ptr[0]; length = data_ptr[1]; MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy); list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); if (found) { list_data_ptr->length += length; } else { list_data_ptr->length = length; list_data_ptr->cur_len = 0; list_data_ptr->d_buffer = NULL; list_data_ptr->i_buffer = NULL; list_data_ptr->cl_ind = -1; list_data_ptr->in_ind = -1; } } CHKERRN(0); /* add any of my parts to the list, if necessary */ for (cl_ind=color2clique->numbers[i]; cl_indnumbers[i+1];cl_ind++) { /* only work on columns that I own */ if (clique2inode->proc[cl_ind] != procinfo->my_id) continue; for (in_ind=clique2inode->inode_index[cl_ind]; in_indinode_index[cl_ind+1];in_ind++) { MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num), found,tree_node_ptr,dummy); list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); if (found) { list_data_ptr->cl_ind = cl_ind; list_data_ptr->in_ind = in_ind; list_data_ptr->length += inodes[in_ind].length; } else { list_data_ptr->length = inodes[in_ind].length; list_data_ptr->cur_len = 0; list_data_ptr->d_buffer = NULL; list_data_ptr->i_buffer = NULL; list_data_ptr->cl_ind = cl_ind; list_data_ptr->in_ind = in_ind; } } } /* make sure that no one has been received out of order */ if (procinfo->error_check) { MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->cl_ind == -1) { MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n"); } } } /* allocate space for each whole column */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); list_data_ptr->buf_size = (list_data_ptr->length*sizeof(FLOAT)) + (list_data_ptr->length*sizeof(int)); MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *), list_data_ptr->buf_size,2); list_data_ptr->i_buffer = (int *) &(list_data_ptr->d_buffer[list_data_ptr->length]); } /* put my part of the column into the collected column */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); in_ind = list_data_ptr->in_ind; length = inodes[in_ind].length; nzptr = inodes[in_ind].nz; d_buffer = list_data_ptr->d_buffer; for (k=0;klength; i_buffer = list_data_ptr->i_buffer; intptr = inodes[in_ind].row_num; /* while copying, convert i_buffer from new local to new global */ for (j=0;jcur_len += length; } /* collect all the parts of "my" columns */ while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) { CHKERRN(0); BMcheck_on_async_block(from_phase); CHKERRN(0); msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0); BMset_msg_ptr(msg,NULL); CHKERRN(0); data_ptr = BMget_user(msg,&msg_len); CHKERRN(0); gnum = data_ptr[0]; length = data_ptr[1]; MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr); if (found) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]); for (k=0;klength; i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]); intptr = (int *) &(msg_buf[length]); for (j=0;jcur_len += length; } else { MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n"); } } CHKERRN(0); BMfree_block_msg(to_phase); CHKERRN(0); BMfinish_on_async_block(from_phase); CHKERRN(0); /* see if the factorizations went okay, if not, then return */ /* we have to wait until here to make sure all messages are received */ /* make anything that needs free'ing is free'ed */ /* everyone agrees on success/error */ GISUM(&fact_error,1,&info,procinfo->procset); if (fact_error != 0) { /* free up space and return with the negative phase number */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); MY_FREE(list_data_ptr->d_buffer); } MY_FREE_TREE(recv_tree); return(-(i+1)); } /* factor these columns */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->length <= 0) continue; cl_ind = list_data_ptr->cl_ind; t = *(clique2inode->d_mats[cl_ind].matrix); for (j=0;jlength;j++) { (list_data_ptr->d_buffer[j]) *= t; } MLOG_flop(list_data_ptr->length); } /* send these columns out */ msg = NULL; while ((msg = BMnext_msg(to_phase,msg)) != NULL) { CHKERRN(0); data_ptr = BMget_user(msg,&data_len); CHKERRN(0); gnum = data_ptr[0]; MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr); if (found) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); } else { MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n"); } BMset_msg_ptr(msg,list_data_ptr->d_buffer); CHKERRN(0); BMset_msg_size(msg,list_data_ptr->buf_size); CHKERRN(0); BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0); BMset_msg_ptr(msg,NULL); CHKERRN(0); } CHKERRN(0); BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0); /* retrieve my part of the column and put it in the data structure */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->length <= 0) continue; in_ind = list_data_ptr->in_ind; length = inodes[in_ind].length; nzptr = inodes[in_ind].nz; d_buffer = list_data_ptr->d_buffer; for (k=0;klength; } /* now update the remaining matrix using the local columns */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); if (list_data_ptr->length <= 0) continue; in_ind = list_data_ptr->in_ind; BSupdt_matrix1(inode_tree,list_data_ptr->length, list_data_ptr->i_buffer,list_data_ptr->d_buffer, inodes[in_ind].length, inodes[in_ind].row_num,inodes[in_ind].nz, color2clique->numbers[i+1], color2clique->numbers[color2clique->length-1],clique2inode, inodes,procinfo); CHKERRN(0); } /* receive the incoming columns and update the remaining matrix */ /* strip out the "local" part and put it in the matrix */ while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) { CHKERRN(0); BMcheck_on_async_block(to_phase); CHKERRN(0); msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0); BMset_msg_ptr(msg,NULL); CHKERRN(0); data_ptr = BMget_user(msg,&msg_len); CHKERRN(0); cl_ind = data_ptr[0]; in_ind = data_ptr[1]; /* let gnum be first global row location of my part */ gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]]; length = inodes[in_ind].length; msg_len = BMget_msg_size(msg); CHKERRN(0); msg_len /= ((sizeof(FLOAT))+sizeof(int)); /* find my local part */ i_buffer = (int *) &(msg_buf[msg_len]); for (ind=0;indnumbers[i+1], color2clique->numbers[color2clique->length-1],clique2inode, inodes,procinfo); CHKERRN(0); } CHKERRN(0); BMfree_block_msg(from_phase); CHKERRN(0); BMfinish_on_async_block(to_phase); CHKERRN(0); /* now free up the list */ MY_FIRST_IN_TREE(recv_tree,tree_node_ptr); while (! IS_TREE_PTR_NULL(tree_node_ptr)) { list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr); MY_NEXT_IN_TREE(tree_node_ptr); MY_FREE(list_data_ptr->d_buffer); } MY_FREE_TREE(recv_tree); } MY_FREE_TREE(inode_tree); BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0); BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0); return(0); } /*+ BSupdt_matrix1 - Update the matrix using a bunch of i-nodes Input Parameters: I don't really know what these things do now. Output Parameters: I don't really know what these things do now. Returns: void Notes: This code is very complicated and could be done much better. +*/ void BSupdt_matrix1(BStree inode_tree,int len,int *updt_index, FLOAT *updt_inode,int len2,int *my_inode_ind,FLOAT *my_inode, int start,int end,BScl_2_inode *clique2inode,BSinode *inodes, BSprocinfo *procinfo) { int i, j; int updt_count, offset, toffset; int cl_ind, in_ind; BSpermutation *iperm; FLOAT *work, *matrix; FLOAT work2; int *intptr; FLOAT *nzptr; int tcount, count2, gnum, size; int found; BStree_ptr tree_node_ptr; int *tree_data_ptr; /* sort the updt_inode */ iperm = BSalloc_permutation(len); CHKERR(0); for (i=0;iperm[i] = i; BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0); MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1); BSiperm_dvec(updt_inode,work,iperm); CHKERR(0); for (j=0;jproc[cl_ind] == procinfo->my_id) { /***** update the clique (stored in UPPER triangle) */ gnum = clique2inode->g_offset[cl_ind]; offset = updt_index[updt_count]-gnum; matrix = clique2inode->d_mats[cl_ind].matrix; size = clique2inode->d_mats[cl_ind].size; tcount = updt_count; work2 = updt_inode[tcount]; while (tcount < len) { toffset = updt_index[tcount]-gnum; if (toffset < size) { matrix[(toffset*size)+offset] -= work2* updt_inode[tcount]; MLOG_flop(2); } else { break; } tcount++; } } /* end of clique update */ } } }