#include "BSprivate.h"
/*+ BSperm_rows - Permute the nonzeros in A in preparation for
creating the efficient parallel execution structure
Input Parameters:
. A - the sparse matrix
. gnum - the new global numbering of the rows
. perm - the inode permutation
. distr - the inode distribution
. procinfo - the usual processor stuff
. keep - Indicates if certain structures should be kept for future
permuting of this structure
. max_row_len - the maximum length of any row in A
Output Parameters:
. A - the sparse matrix with permuted rows
Returns:
a structure used to perform the permutation quickly in the future
+*/
BSkey_arr *BSperm_rows(BSspmat *A, BSnumbering *gnum, BSpermutation *perm,
BSdistribution *distr, BSprocinfo *procinfo, int keep, int *max_row_len)
{
int num_nz;
int count, ind;
int i, j, k;
BSpermutation *iperm;
BSpermutation *tperm;
int *key;
BSkey_arr *key_arr;
FLOAT *dwork;
int *iwork;
BSbb *trans_bb;
int query_len, *addrs, *answs;
void (*map)(int,int *,int *,BSprocinfo *,BSmapping *);
BSsprow *row;
int *colptr;
FLOAT *nzptr;
int *map_work;
/* create inverse permutation */
iperm = BSalloc_permutation(A->num_rows); CHKERRN(0);
BSperm2iperm(perm,iperm); CHKERRN(0);
/* find the number of nonlocal nonzeros */
num_nz = BSnonlocalnz(A,max_row_len,procinfo); CHKERRN(0);
MY_MALLOCN(map_work,(int *),sizeof(int)*(*max_row_len),0);
/* set up query */
map = A->map->fglobal2proc;
MY_MALLOCN(addrs,(int *),sizeof(int)*num_nz,3);
query_len = 0;
count = 0;
for (i=0;i<distr->max+1;i++) {
ind = iperm->perm[count];
count += distr->distribution[i];
row = A->rows[ind];
colptr = row->col;
(*map)(row->length,colptr,map_work,procinfo,A->map); CHKERRN(0);
for (j=0;j<row->length;j++) {
if (map_work[j] != procinfo->my_id) {
addrs[query_len] = colptr[j];
query_len++;
}
}
}
/* set up and query bulletin board */
trans_bb = BSinit_bb(A->num_rows,A->map);CHKERRN(0);
BSpost_noaddr_bb(trans_bb,A->num_rows,gnum->numbers); CHKERRN(0);
MY_MALLOCN(answs,(int *),sizeof(int)*num_nz,4);
BSquery_match_bb(trans_bb,query_len,addrs,answs,procinfo); CHKERRN(0);
MY_FREEN(addrs);
BSfree_bb(trans_bb); CHKERRN(0);
/* now, do the sorting */
MY_MALLOCN(dwork,(FLOAT *),sizeof(FLOAT)*(*max_row_len),1);
MY_MALLOCN(iwork,(int *),sizeof(int)*(*max_row_len),2);
/* if keep is true, then allocate array of keys */
if (keep) {
key_arr = BSalloc_key_arr(distr->max+1); CHKERRN(0);
} else {
key_arr = NULL;
MY_MALLOCN(key,(int *),sizeof(int)*(*max_row_len),5);
}
query_len = 0;
count = 0;
map = A->map->fglobal2local;
tperm = BSalloc_permutation((*max_row_len)); CHKERRN(0);
for (i=0;i<distr->max+1;i++) {
ind = iperm->perm[count];
row = A->rows[ind];
colptr = row->col;
if (keep) {
MY_MALLOCN(key,(int *),sizeof(int)*row->length,5);
key_arr->array[i] = key;
}
(*map)(row->length,colptr,map_work,procinfo,A->map); CHKERRN(0);
for (j=0;j<row->length;j++) {
if (map_work[j] < 0) {
key[j] = answs[query_len];
query_len++;
} else {
key[j] = gnum->numbers[map_work[j]];
}
}
BSreset_permutation(row->length,tperm); CHKERRN(0);
for (j=0;j<row->length;j++) tperm->perm[j] = j;
/* now find the inverse permutation according to the new numbers */
BSheap_sort1(row->length,key,tperm->perm); CHKERRN(0);
/* permute the column values into iwork and then copy them */
/* into the correct places */
/* also create a floating point work vector */
BSiperm_ivec(colptr,iwork,tperm); CHKERRN(0);
for (k=0;k<row->length;k++) {
colptr[k] = iwork[k];
}
for (j=0;j<distr->distribution[i];j++) {
ind = iperm->perm[count];
row = A->rows[ind];
nzptr = row->nz;
BSiperm_dvec(nzptr,dwork,tperm); CHKERRN(0);
for (k=0;k<row->length;k++) {
nzptr[k] = dwork[k];
}
count++;
}
}
BSfree_permutation(tperm); CHKERRN(0);
if (!keep) {
MY_FREE(key);
}
MY_FREEN(map_work);
MY_FREEN(dwork);
MY_FREEN(iwork);
MY_FREEN(answs);
BSfree_permutation(iperm); CHKERRN(0);
return(key_arr);
}
syntax highlighted by Code2HTML, v. 0.9.1