#include "BSprivate.h"

/*+ BSperm_rows - Permute the nonzeros in A in preparation for
                  creating the efficient parallel execution structure

    Input Parameters:
.   A - the sparse matrix
.   gnum - the new global numbering of the rows
.   perm - the inode permutation
.   distr - the inode distribution
.   procinfo - the usual processor stuff
.   keep - Indicates if certain structures should be kept for future
           permuting of this structure
.   max_row_len - the maximum length of any row in A

    Output Parameters:
.   A - the sparse matrix with permuted rows

    Returns:
    a structure used to perform the permutation quickly in the future

 +*/
BSkey_arr *BSperm_rows(BSspmat *A, BSnumbering *gnum, BSpermutation *perm,
	BSdistribution *distr, BSprocinfo *procinfo, int keep, int *max_row_len)
{
	int	num_nz;
	int	count, ind;
	int	i, j, k;
	BSpermutation *iperm;
	BSpermutation *tperm;
	int	*key;
	BSkey_arr *key_arr;
	FLOAT	*dwork;
	int	*iwork;
	BSbb *trans_bb;
	int	query_len, *addrs, *answs;
	void    (*map)(int,int *,int *,BSprocinfo *,BSmapping *);
	BSsprow *row;
	int	*colptr;
	FLOAT	*nzptr;
	int	*map_work;


	/* create inverse permutation */
	iperm = BSalloc_permutation(A->num_rows); CHKERRN(0);
	BSperm2iperm(perm,iperm); CHKERRN(0);

	/* find the number of nonlocal nonzeros */
	num_nz = BSnonlocalnz(A,max_row_len,procinfo); CHKERRN(0);
	MY_MALLOCN(map_work,(int *),sizeof(int)*(*max_row_len),0);

	/* set up query */
	map = A->map->fglobal2proc;
	MY_MALLOCN(addrs,(int *),sizeof(int)*num_nz,3);
	query_len = 0;
	count = 0;
	for (i=0;i<distr->max+1;i++) {
		ind = iperm->perm[count];
		count += distr->distribution[i];
		row = A->rows[ind];
		colptr = row->col;
		(*map)(row->length,colptr,map_work,procinfo,A->map); CHKERRN(0);
		for (j=0;j<row->length;j++) {
			if (map_work[j] != procinfo->my_id) {
				addrs[query_len] = colptr[j];
				query_len++;
			}
		}
	}

	/* set up and query bulletin board */
	trans_bb = BSinit_bb(A->num_rows,A->map);CHKERRN(0);
	BSpost_noaddr_bb(trans_bb,A->num_rows,gnum->numbers); CHKERRN(0);
	MY_MALLOCN(answs,(int *),sizeof(int)*num_nz,4);
	BSquery_match_bb(trans_bb,query_len,addrs,answs,procinfo); CHKERRN(0);
	MY_FREEN(addrs);
	BSfree_bb(trans_bb); CHKERRN(0);

	/* now, do the sorting */
	MY_MALLOCN(dwork,(FLOAT *),sizeof(FLOAT)*(*max_row_len),1);
	MY_MALLOCN(iwork,(int *),sizeof(int)*(*max_row_len),2);
	/* if keep is true, then allocate array of keys */
	if (keep) {
		key_arr = BSalloc_key_arr(distr->max+1); CHKERRN(0);
	} else {
		key_arr = NULL;
		MY_MALLOCN(key,(int *),sizeof(int)*(*max_row_len),5);
	}
	query_len = 0;
	count = 0;
	map = A->map->fglobal2local;
	tperm = BSalloc_permutation((*max_row_len)); CHKERRN(0);
	for (i=0;i<distr->max+1;i++) {
		ind = iperm->perm[count];
		row = A->rows[ind];
		colptr = row->col;
		if (keep) {
			MY_MALLOCN(key,(int *),sizeof(int)*row->length,5);
			key_arr->array[i] = key;
		}
		(*map)(row->length,colptr,map_work,procinfo,A->map); CHKERRN(0);
		for (j=0;j<row->length;j++) {
			if (map_work[j] < 0) {
				key[j] = answs[query_len];
				query_len++;
			} else {
				key[j] = gnum->numbers[map_work[j]];
			}
		}
		BSreset_permutation(row->length,tperm); CHKERRN(0);
		for (j=0;j<row->length;j++) tperm->perm[j] = j;

		/* now find the inverse permutation according to the new numbers */
		BSheap_sort1(row->length,key,tperm->perm); CHKERRN(0);

		/* permute the column values into iwork and then copy them */
		/* into the correct places */
		/* also create a floating point work vector */
		BSiperm_ivec(colptr,iwork,tperm); CHKERRN(0);
		for (k=0;k<row->length;k++) {
			colptr[k] = iwork[k];
		}
		for (j=0;j<distr->distribution[i];j++) {
			ind = iperm->perm[count];
			row = A->rows[ind];
			nzptr = row->nz;
			BSiperm_dvec(nzptr,dwork,tperm); CHKERRN(0);
			for (k=0;k<row->length;k++) {
				nzptr[k] = dwork[k];
			}
			count++;
		}
	}
	BSfree_permutation(tperm); CHKERRN(0);

	if (!keep) {
		MY_FREE(key);
	}
	MY_FREEN(map_work);
	MY_FREEN(dwork);
	MY_FREEN(iwork);
	MY_FREEN(answs);
	BSfree_permutation(iperm); CHKERRN(0);

	return(key_arr);
}


syntax highlighted by Code2HTML, v. 0.9.1