#include "BSprivate.h"

/* a data structure used only in factoring */
/* nodes in a list of i-nodes that need to be received/sent during */
/* factorization */
typedef struct __list_type {
  int	length;		/* length of the columns in the i-node */
  int	num_cols;	/* # of columns in the i-node */
  int	cur_len;	/* don't know anymore */
  FLOAT	*d_buffer;	/* don't know anymore */
  int	*i_buffer;	/* don't know anymore */
  int	buf_size;	/* don't know anymore */
  int	cl_ind; 	/* index of the clique, if local */
  int	in_ind; 	/* index of the i-node if local */
} list_type;

#include "BStree.h"

extern void BSilu_updt_matrix(BSpar_mat *, int, BStree, BStree, int, int, int *,
     FLOAT *, int , int *, FLOAT *, int , int , BScl_2_inode *, BSinode *,
     BSprocinfo *);

/*+ BSilu_factorn - Compute the incomplete LU factor of a matrix

    Input Parameters:
.   A - The sparse matrix to be factored
.   comm - the communication structure of the factoring
.   procinfo - the usual processor info

    Output Parameters:
.   A - The factored sparse matrix

    Returns:
    0 if successful, otherwise a negative number whose absolute
    value is the row number of the color (less one) where the
    failure occured.

 +*/

typedef struct __col_list {
  int   count;
  int   *col;
  FLOAT **nz;
} col_list;

int BSilu_factorn(BSpar_mat *A, BScomm *comm, BSprocinfo *procinfo)
{
  BMphase *to_phase, *from_phase;
  BMmsg *msg;
  int  i, j, k;
  int  cl_ind, in_ind;
  int  count, size, ind;
  int  length, num_cols, gnum, offset;
  int  found;
  int  *intptr, *intptr2, *i_buffer;
  FLOAT *d_buffer;
  BScl_2_inode *clique2inode;
  BSnumbering *color2clique;
  BSinode *inodes;
  int  *data_ptr, data_len, msg_len;
  FLOAT *msg_buf, *matrix;
  FLOAT *nzptr;
  char TR = 'N';
  FLOAT one = 1.0;
  int  info, fact_error;
  list_type *list_data_ptr;
  BStree_ptr  tree_node_ptr;
  BStree  recv_tree;
  BStree  inode_tree;
  int  dummy = 0;
  int  *g_row_num, *iperm;

  int        l, max_size, dd_size, *ipiv, *ii_buffer;
  FLOAT      zero=0.0, *work, *dd_buffer, *temp;
  BStree     rowtree;
  BStree_ptr node_ptr;
  col_list   *col_list_ptr;


  BMinit_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
  BMinit_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
  color2clique = A->color2clique;
  clique2inode = A->clique2inode;
  inodes = A->inodes->list;
  g_row_num = A->global_row_num->numbers;
  iperm = A->inv_perm->perm;

  /* max_size is added in the following part (ILU) */
  max_size = -1;
  MY_INIT_TREE(inode_tree,(sizeof(int)*2));
  for (i=0;i<color2clique->length-1;i++) {
    for (cl_ind=color2clique->numbers[i];
      cl_ind<color2clique->numbers[i+1];cl_ind++) {
      if (clique2inode->d_mats[cl_ind].size > max_size)
	max_size = clique2inode->d_mats[cl_ind].size;
      for (in_ind=clique2inode->inode_index[cl_ind];
        in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
        MY_INSERT_TREE_NODE(inode_tree,(inodes[in_ind].gcol_num),
          found,tree_node_ptr,dummy);
        data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
        data_ptr[0] = in_ind;
        data_ptr[1] = cl_ind;
      }
    }
  }
  MY_MALLOCN(work,(FLOAT *),max_size*sizeof(FLOAT),1);

  /* Create rowtree just like factor1 case. But take care of */
  /* the inode thing (ILU)                                   */
  MY_INIT_TREE(rowtree,sizeof(col_list));
  for (j=0; j<A->inodes->length; j++) {
    for (k=0; k<inodes[j].num_cols; k++) {
      for (i=0; i<inodes[j].length; i++) {
        MY_INSERT_TREE_NODE(rowtree,g_row_num[iperm[inodes[j].row_num[i]]],
			    found,node_ptr,dummy);
        col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
        if (found) {
	  col_list_ptr->count++;
        }
        else {
	  col_list_ptr->count = 1;
	  /* Recall that A->max_local_row_length is used to store max_row_len */
	  MY_MALLOCN(col_list_ptr->col,(int *),sizeof(int)*
			A->max_local_row_length,1);
	  MY_MALLOCN(col_list_ptr->nz,(FLOAT **),sizeof(FLOAT *)*
		    A->max_local_row_length,1);
        }
        col_list_ptr->col[col_list_ptr->count-1] = inodes[j].gcol_num+k;
        col_list_ptr->nz[col_list_ptr->count-1] = 
	  &(inodes[j].nz[k*inodes[j].length+i]);
      }
    }
  }

/*
MY_FIRST_IN_TREE(rowtree,node_ptr);
while (! IS_TREE_PTR_NULL(node_ptr)) {
  col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
  MY_NEXT_IN_TREE(node_ptr);
  printf("[%d]:rowtree list: ",procinfo->my_id);
  for (i=0; i<col_list_ptr->count; i++)
    printf(" %2d",col_list_ptr->col[i]);
  printf("\n");
}
*/

  fact_error = FALSE;
  /* now do this phase by phase */
  for (i=0;i<color2clique->length-1;i++) {
    to_phase = BMget_phase(comm->to_msg,i); CHKERRN(0);
    from_phase = BMget_phase(comm->from_msg,i); CHKERRN(0);

    /* send my part of each column in this color */
    /* use the "from" part here */
    msg = NULL;
    while ((msg = BMnext_msg(from_phase,msg)) != NULL) { 
      CHKERRN(0);
      /* allocate space for the message */
      data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
      in_ind = data_ptr[1];
      size = inodes[in_ind].length*inodes[in_ind].num_cols*sizeof(FLOAT)
        + inodes[in_ind].length*sizeof(int);
      MY_MALLOCN(msg_buf,(FLOAT *),size,1);
      BMset_msg_ptr(msg,msg_buf); CHKERRN(0);
      BMset_msg_size(msg,size); CHKERRN(0);
      count = 0;
      for (j=0;j<inodes[in_ind].num_cols;j++) {
        nzptr = &(inodes[in_ind].nz[j*inodes[in_ind].length]);
        for (k=0;k<inodes[in_ind].length;k++) {
          msg_buf[count] = nzptr[k];
          count++;
        }
      }
      intptr = (int *) &(msg_buf[count]);
      intptr2 = (int *) inodes[in_ind].row_num;
      for (j=0;j<inodes[in_ind].length;j++) {
        intptr[j] = g_row_num[iperm[intptr2[j]]];
      }
      BMsend_block_msg(from_phase,msg,procinfo); CHKERRN(0);
      MY_FREE(msg_buf);
      BMset_msg_ptr(msg,NULL); CHKERRN(0);
    }
    CHKERRN(0);
    BMsend_block_msg(from_phase,NULL,procinfo); CHKERRN(0);

    /* factor the cliques in this color */
    /* the following is modified (ILU)  */
    for (cl_ind=color2clique->numbers[i];
      cl_ind<color2clique->numbers[i+1];cl_ind++) {
      if (procinfo->my_id == clique2inode->proc[cl_ind]) {
        /* factor this clique */
        /* stored in the upper diagonal */
        size = clique2inode->d_mats[cl_ind].size;
        matrix = clique2inode->d_mats[cl_ind].matrix;
	ipiv = (int *) &(matrix[size*size]);
	DGETRF(&size,&size,matrix,&size,ipiv,&info);
        MLOG_flop((size*size*size)/6);
        if (info != 0) fact_error = TRUE;
        /* now invert the clique */
        DGETRI(&size,matrix,&size,ipiv,work,&size,&info);
        MLOG_flop((size*size*size)/6);
        if (info != 0) fact_error = TRUE;
      }
    }

    /* figure out what I am going to receive and make room for it */
    /* use the "to" part here */
    MY_INIT_TREE(recv_tree,sizeof(list_type));
    msg = NULL;
    while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
      CHKERRN(0);
      /* search list for this number */
      data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
      gnum = data_ptr[0];
      length = data_ptr[1];
      num_cols = data_ptr[2];
      MY_INSERT_TREE_NODE(recv_tree,gnum,found,tree_node_ptr,dummy);
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      if (found) {
        list_data_ptr->length += length;
      } else {
        list_data_ptr->length = length;
        list_data_ptr->num_cols = num_cols;
        list_data_ptr->cur_len = 0;
        list_data_ptr->d_buffer = NULL;
        list_data_ptr->i_buffer = NULL;
        list_data_ptr->cl_ind = -1;
        list_data_ptr->in_ind = -1;
      }
    }
    CHKERRN(0);
    /* add any of my parts to the list, if necessary */
    for (cl_ind=color2clique->numbers[i];
      cl_ind<color2clique->numbers[i+1];cl_ind++) {
      /* only work on columns that I own */
      if (clique2inode->proc[cl_ind] != procinfo->my_id) continue;
      for (in_ind=clique2inode->inode_index[cl_ind];
        in_ind<clique2inode->inode_index[cl_ind+1];in_ind++) {
        MY_INSERT_TREE_NODE(recv_tree,(inodes[in_ind].gcol_num),
          found,tree_node_ptr,dummy);
        list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
        if (found) {
          list_data_ptr->cl_ind = cl_ind;
          list_data_ptr->in_ind = in_ind;
          list_data_ptr->length += inodes[in_ind].length;
        } else {
          list_data_ptr->length = inodes[in_ind].length;
          list_data_ptr->num_cols = inodes[in_ind].num_cols;
          list_data_ptr->cur_len = 0;
          list_data_ptr->d_buffer = NULL;
          list_data_ptr->i_buffer = NULL;
          list_data_ptr->cl_ind = cl_ind;
          list_data_ptr->in_ind = in_ind;
        }
      }
    }
    /* make sure that no one has been received out of order */
    if (procinfo->error_check) {
      MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
      while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
        list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
        MY_NEXT_IN_TREE(tree_node_ptr);
        if (list_data_ptr->cl_ind == -1) {
          MY_SETERRCN(FACTOR_ERROR,"Received column out of order\n");
        }
      }
    }

    /* allocate space for each whole column */
    MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
    while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      MY_NEXT_IN_TREE(tree_node_ptr);
      list_data_ptr->buf_size = 
        (list_data_ptr->num_cols*list_data_ptr->length*sizeof(FLOAT)) +
        (list_data_ptr->length*sizeof(int));
      MY_MALLOCN(list_data_ptr->d_buffer,(FLOAT *),
        list_data_ptr->buf_size,2);
      list_data_ptr->i_buffer = (int *)
        &(list_data_ptr->d_buffer[list_data_ptr->num_cols*
        list_data_ptr->length]);
    }

    /* put my part of the column into the collected column */
    MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
    while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      MY_NEXT_IN_TREE(tree_node_ptr);
      in_ind = list_data_ptr->in_ind;
      length = inodes[in_ind].length;
      num_cols = inodes[in_ind].num_cols;
      nzptr = inodes[in_ind].nz;
      d_buffer = list_data_ptr->d_buffer;
      for (j=0;j<num_cols;j++) {
        for (k=0;k<length;k++) {
          d_buffer[k] = nzptr[(j*length)+k];
        }
        d_buffer += list_data_ptr->length;
      }
      i_buffer = list_data_ptr->i_buffer;
      intptr = inodes[in_ind].row_num;
      /* while copying, convert i_buffer from new local to new global */
      for (j=0;j<length;j++) {
        i_buffer[j] = g_row_num[iperm[intptr[j]]];
      }
      list_data_ptr->cur_len += length;
    }

    /* collect all the parts of "my" columns */
    while ((msg = BMrecv_block_msg(to_phase,procinfo)) != NULL) {
      CHKERRN(0);
      BMcheck_on_async_block(from_phase); CHKERRN(0);
      msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
      BMset_msg_ptr(msg,NULL); CHKERRN(0);
      data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
      gnum = data_ptr[0];
      length = data_ptr[1];
      num_cols = data_ptr[2];
      MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
      if (found) {
        list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
        d_buffer = &(list_data_ptr->d_buffer[list_data_ptr->cur_len]);
        count = 0;
        for (j=0;j<num_cols;j++) {
          for (k=0;k<length;k++) {
            d_buffer[k] = msg_buf[count];
            count++;
          }
          d_buffer += list_data_ptr->length;
        }
        i_buffer = &(list_data_ptr->i_buffer[list_data_ptr->cur_len]);
        intptr = (int *) &(msg_buf[num_cols*length]);
        for (j=0;j<length;j++) {
          i_buffer[j] = intptr[j];
        }
        list_data_ptr->cur_len += length;
      } else {
        MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
      }
    }
    CHKERRN(0);
    BMfree_block_msg(to_phase); CHKERRN(0);
    BMfinish_on_async_block(from_phase); CHKERRN(0);

    /* see if the factorizations went okay, if not, then return */
    /* we have to wait until here to make sure all messages are received */
    /* make anything that needs free'ing is free'ed */
    /* everyone agrees on success/error */
    GISUM(&fact_error,1,&info,procinfo->procset);
    if (fact_error != 0) {
      /* free up space and return with the negative phase number */
      MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
      while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
        list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
        MY_NEXT_IN_TREE(tree_node_ptr);
        MY_FREE(list_data_ptr->d_buffer);
      }
      MY_FREE_TREE(recv_tree);
      return(-(i+1));
    }

    /* factor these columns */
    /* the following part is modified (ILU) */
    MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
    while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      MY_NEXT_IN_TREE(tree_node_ptr);
      if (list_data_ptr->length <= 0) continue;
      cl_ind = list_data_ptr->cl_ind;
      in_ind = list_data_ptr->in_ind;
      j = inodes[in_ind].num_cols;
      offset = inodes[in_ind].gcol_num - clique2inode->g_offset[cl_ind];
      size = clique2inode->d_mats[cl_ind].size;
      matrix = clique2inode->d_mats[cl_ind].matrix;
      matrix += ((offset*size)+offset);
      length = list_data_ptr->length;

      MY_MALLOCN(temp,(FLOAT *),length*j*sizeof(FLOAT),1);
      for (k=0; k<j*length; k++) temp[k] = list_data_ptr->d_buffer[k];

      DGEMM(&TR,&TR,&length,&j,&j,&one,temp,&length,
	     matrix,&size,&zero,list_data_ptr->d_buffer,&length);

      MY_FREE(temp);
      MLOG_flop(2*((j*(j+1))/2)*list_data_ptr->length*j);
    }

    /* send these columns out */
    /* the following part is modified (ILU) */
    msg = NULL;
    while ((msg = BMnext_msg(to_phase,msg)) != NULL) {
      CHKERRN(0);
      data_ptr = BMget_user(msg,&data_len); CHKERRN(0);
      gnum = data_ptr[0];
      MY_SEARCH_TREE(recv_tree,gnum,found,tree_node_ptr);
      if (found) {
	/* insert pivot row to the message (ILU) */
        list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
	length = list_data_ptr->length;
        num_cols = list_data_ptr->num_cols;

	dd_size = length*(2*sizeof(FLOAT)*num_cols+sizeof(int));
	MY_MALLOCN(dd_buffer,(FLOAT *),dd_size,1);
	for (j=0; j<num_cols*length; j++)
	  dd_buffer[j] = list_data_ptr->d_buffer[j];
        k = j;

	for (l=0; l<num_cols; l++) {
	  MY_SEARCH_TREE(rowtree,gnum+l,found,node_ptr);
	  if (found) {
	    col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
	    for (j=0; j<col_list_ptr->count; j++)
	      dd_buffer[k+j] = col_list_ptr->nz[j][0];
            k += col_list_ptr->count;
          }
	  else {
	    printf("Error: Something wrong with rowtree.\n");
          }
        }
        ii_buffer = (int *) &(dd_buffer[k]);
	for (j=0; j<length; j++)
	  ii_buffer[j] = list_data_ptr->i_buffer[j];

        BMset_msg_ptr(msg,dd_buffer); CHKERRN(0);
        BMset_msg_size(msg,dd_size); CHKERRN(0);
        BMsend_block_msg(to_phase,msg,procinfo); CHKERRN(0);
        BMset_msg_ptr(msg,NULL); CHKERRN(0);

        MY_FREE(dd_buffer);
      } else {
        MY_SETERRCN(FACTOR_ERROR,"Can't find matching column\n");
      }
    }
    CHKERRN(0);
    BMsend_block_msg(to_phase,NULL,procinfo); CHKERRN(0);

    /* retrieve my part of the column and put it in the data structure */
    /* the following part is modified (ILU) */
    MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
    while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      MY_NEXT_IN_TREE(tree_node_ptr);
      if (list_data_ptr->length <= 0) continue;
      in_ind = list_data_ptr->in_ind;
      cl_ind = list_data_ptr->cl_ind;
      length = inodes[in_ind].length;
      num_cols = inodes[in_ind].num_cols;
      nzptr = inodes[in_ind].nz;
      d_buffer = list_data_ptr->d_buffer;
      i_buffer = list_data_ptr->i_buffer;
      for (j=0;j<num_cols;j++) {
        for (k=0;k<length;k++) {
	  if (i_buffer[k] > clique2inode->g_offset[cl_ind])
            nzptr[(j*length)+k] = d_buffer[k];
        }
        d_buffer += list_data_ptr->length;
      }
    }

    /* now update the remaining matrix using the local columns */
    /* the following part is modified (ILU) */
    MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
    while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      MY_NEXT_IN_TREE(tree_node_ptr);
      if (list_data_ptr->length <= 0) continue;
      in_ind = list_data_ptr->in_ind;
      gnum = inodes[in_ind].gcol_num;

      MY_MALLOCN(dd_buffer,(FLOAT *),list_data_ptr->length*
		list_data_ptr->num_cols*sizeof(FLOAT),1);
      k = 0;
      for (l=0; l<list_data_ptr->num_cols; l++) {
        MY_SEARCH_TREE(rowtree,gnum+l,found,node_ptr);
        if (found) {
         col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
         for (j=0; j<col_list_ptr->count; j++)
           dd_buffer[k+j] = col_list_ptr->nz[j][0];
         k += col_list_ptr->count;
        }
	else {
	  printf("Error: Something wrong with rowtree.\n");
        }
      }

      BSilu_updt_matrix(A,gnum,rowtree,
	inode_tree,list_data_ptr->length,
        list_data_ptr->num_cols,
        list_data_ptr->i_buffer,list_data_ptr->d_buffer,
        col_list_ptr->count,col_list_ptr->col,dd_buffer,
        color2clique->numbers[i+1],
        color2clique->numbers[color2clique->length-1],clique2inode,
        inodes,procinfo); CHKERRN(0);

      MY_FREE(dd_buffer);
    }

    /* receive the incoming columns and update the remaining matrix */
    /* strip out the "local" part and put it in the matrix */
    /* the following part is modified (ILU) */
    while ((msg = BMrecv_block_msg(from_phase,procinfo)) != NULL) {
      CHKERRN(0);
      BMcheck_on_async_block(to_phase); CHKERRN(0);
      msg_buf = (FLOAT *) BMget_msg_ptr(msg); CHKERRN(0);
      BMset_msg_ptr(msg,NULL); CHKERRN(0);
      data_ptr = BMget_user(msg,&msg_len); CHKERRN(0);
      cl_ind = data_ptr[0];
      in_ind = data_ptr[1];
      /* let gnum be first global row location of my part */
      gnum = g_row_num[iperm[inodes[in_ind].row_num[0]]];
      num_cols = inodes[in_ind].num_cols;
      length = inodes[in_ind].length;
      msg_len = BMget_msg_size(msg); CHKERRN(0);
      msg_len /= 2*sizeof(FLOAT)*num_cols+sizeof(int);

      /* find my local part */
      i_buffer = (int *) &(msg_buf[2*num_cols*msg_len]);
      for (ind=0;ind<msg_len;ind++) {
        if (gnum == i_buffer[ind]) break;
      }
      nzptr = inodes[in_ind].nz;
      d_buffer = &(msg_buf[ind]);
      for (j=0;j<num_cols;j++) {
        for (k=0;k<length;k++) {
	  if (i_buffer[ind+k] > clique2inode->g_offset[cl_ind])
            nzptr[(j*length)+k] = d_buffer[k];
        }
        d_buffer += msg_len;
      }

      /* update the remaining matrix */
      BSilu_updt_matrix(A,inodes[in_ind].gcol_num,rowtree,
	inode_tree,msg_len,num_cols,i_buffer,msg_buf,msg_len,
        i_buffer,&(msg_buf[msg_len*num_cols]),color2clique->numbers[i+1],
        color2clique->numbers[color2clique->length-1],clique2inode,
        inodes,procinfo); CHKERRN(0);
    }
    CHKERRN(0);
    BMfree_block_msg(from_phase); CHKERRN(0);
    BMfinish_on_async_block(to_phase); CHKERRN(0);

    /* now free up the list */
    MY_FIRST_IN_TREE(recv_tree,tree_node_ptr);
    while (! IS_TREE_PTR_NULL(tree_node_ptr)) {
      list_data_ptr = (list_type *) MY_GET_TREE_DATA(tree_node_ptr);
      MY_NEXT_IN_TREE(tree_node_ptr);
      MY_FREE(list_data_ptr->d_buffer);
    }
    MY_FREE_TREE(recv_tree);
  }

  /* now free the rowtree and other stuff (ILU) */
  MY_FIRST_IN_TREE(rowtree,node_ptr);
  while (! IS_TREE_PTR_NULL(node_ptr)) {
    col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
    MY_NEXT_IN_TREE(node_ptr);
    MY_FREE(col_list_ptr->col);
    MY_FREE(col_list_ptr->nz);
  }
  MY_FREE_TREE(rowtree);
  MY_FREE_TREE(inode_tree);
  MY_FREE(work);

  BMfinish_comp_msg(comm->from_msg,procinfo); CHKERRN(0);
  BMfinish_comp_msg(comm->to_msg,procinfo); CHKERRN(0);
  return(0);
}

/*+ BSilu_updt_matrix - Update the matrix using a bunch of i-nodes 

    Input Parameters:
    I don't really know what these things do now.

    Output Parameters:
    I don't really know what these things do now.

    Returns:
    void

    Notes:
    This code is very complicated and could be done much better.

+*/
void BSilu_updt_matrix(BSpar_mat *A, int colnum, BStree rowtree,
BStree inode_tree, int len, int num_cols, int *updt_index,
FLOAT *updt_inode, int len2, int *my_inode_ind, FLOAT *my_inode,
int start, int end, BScl_2_inode *clique2inode, BSinode *inodes,
BSprocinfo *procinfo)
{
  int  i, j;
  int  updt_count, offset, toffset;
  int  cl_ind, in_ind;
  BSpermutation *iperm;
  FLOAT  *work, *matrix;
  int  *intptr;
  FLOAT  *nzptr;
  int  tcount, count2, gnum, size;
  int  found;
  BStree_ptr  tree_node_ptr;
  int  *tree_data_ptr;

  int        k, l, *rowptr, *row_gnum, *g_iperm;
  BStree_ptr node_ptr;
  col_list   *col_list_ptr;
  FLOAT      one = 1.0, none = -1.0;
  char       TR = 'T', NTR = 'N';

  /* sort the updt_inode */
  iperm = BSalloc_permutation(len); CHKERR(0);
  for (i=0;i<len;i++) iperm->perm[i] = i;
  BSheap_sort1(len,updt_index,iperm->perm); CHKERR(0);
  MY_MALLOC(work,(FLOAT *),sizeof(FLOAT)*len,1);
  for (i=0;i<num_cols;i++) {
    BSiperm_dvec(&(updt_inode[i*len]),work,iperm); CHKERR(0);
    for (j=0;j<len;j++) updt_inode[(i*len)+j] = work[j];
  }
  MY_FREE(work);
  BSfree_permutation(iperm); CHKERR(0);

  row_gnum = A->global_row_num->numbers;
  g_iperm = A->inv_perm->perm;

  updt_count = 0;
  while (updt_count < len) {
    MY_SEARCH_TREE(inode_tree,updt_index[updt_count],found,tree_node_ptr);
    if (found) {
      tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
      in_ind = tree_data_ptr[0];
      cl_ind = tree_data_ptr[1];

      /* make sure the nz are below pivot (ILU) */
      if (clique2inode->g_offset[cl_ind] > colnum) {
        /* update the inode */
        tcount = 0;
        count2 = 0;
        MY_SEARCH_TREE(rowtree,updt_index[updt_count],found,node_ptr);
	if (found) {
	  col_list_ptr = (col_list *) MY_GET_TREE_DATA(node_ptr);
	  intptr = col_list_ptr->col;
          while ((tcount<len2) && (count2<col_list_ptr->count)) {
            if ((my_inode_ind[tcount] == intptr[count2]) &
		(intptr[count2] > colnum)) {
              MY_SEARCH_TREE(inode_tree,intptr[count2],found,tree_node_ptr);
	      tree_data_ptr = (int *) MY_GET_TREE_DATA(tree_node_ptr);
              i = tree_data_ptr[0];
	      nzptr  = inodes[i].nz;
	      rowptr = inodes[i].row_num;
	      /* search the row that we are going to modify (ILU) */
	      for (j=0; j<inodes[i].length; j++)
		if (row_gnum[g_iperm[rowptr[j]]] == updt_index[updt_count])
		  break;

              /* usual Gaussian elimination step (ILU) */
	      if (j<inodes[i].length)
		DGEMM(&NTR,&TR,&(inodes[in_ind].num_cols),&(inodes[i].num_cols),
		     &num_cols,&none,&(updt_inode[updt_count]),&len,
		     &(my_inode[tcount]),&len2,&one,&(nzptr[j]),
		     &(inodes[i].length));

              MLOG_flop(2*num_cols*inodes[in_ind].num_cols);
              tcount += inodes[i].num_cols;
              count2 += inodes[i].num_cols;
            } else if (my_inode_ind[tcount] < intptr[count2]) {
              tcount++;
            } else {
              count2++;
            }
          }
          /* update a portion of the clique if it is local */
          gnum = clique2inode->g_offset[cl_ind];
          offset = updt_index[updt_count]-gnum;
          matrix = clique2inode->d_mats[cl_ind].matrix;
          size = clique2inode->d_mats[cl_ind].size;
	  for (j=0; j<len2; j++)
	    if (my_inode_ind[j] == updt_index[updt_count]) break;
          if (j==len2) {
	    printf("Error: The matrix is not structurally symmetric!\n");
          }
          for (k=0; k<inodes[in_ind].num_cols; k++) {
	    tcount = updt_count;
	    toffset = updt_index[updt_count+k]-gnum;
	    if (toffset < size) {
	      while ((tcount < len) & (updt_index[tcount]-gnum < size)) {
		for (l=0; l<num_cols; l++) {
		  matrix[toffset*size+offset+tcount-updt_count] -= 
		    my_inode[l*len2+j+k]*updt_inode[l*len+tcount];
                }
                MLOG_flop(2*num_cols);
	        tcount++;
              }
            }
          }
	  /* end of update clique */
        }
      }
      updt_count += inodes[in_ind].num_cols;
    } else {
      updt_count++;
    }
  }
}


syntax highlighted by Code2HTML, v. 0.9.1