#include "BSprivate.h" /*+ BSperm_rows - Permute the nonzeros in A in preparation for creating the efficient parallel execution structure Input Parameters: . A - the sparse matrix . gnum - the new global numbering of the rows . perm - the inode permutation . distr - the inode distribution . procinfo - the usual processor stuff . keep - Indicates if certain structures should be kept for future permuting of this structure . max_row_len - the maximum length of any row in A Output Parameters: . A - the sparse matrix with permuted rows Returns: a structure used to perform the permutation quickly in the future +*/ BSkey_arr *BSperm_rows(BSspmat *A, BSnumbering *gnum, BSpermutation *perm, BSdistribution *distr, BSprocinfo *procinfo, int keep, int *max_row_len) { int num_nz; int count, ind; int i, j, k; BSpermutation *iperm; BSpermutation *tperm; int *key; BSkey_arr *key_arr; FLOAT *dwork; int *iwork; BSbb *trans_bb; int query_len, *addrs, *answs; void (*map)(int,int *,int *,BSprocinfo *,BSmapping *); BSsprow *row; int *colptr; FLOAT *nzptr; int *map_work; /* create inverse permutation */ iperm = BSalloc_permutation(A->num_rows); CHKERRN(0); BSperm2iperm(perm,iperm); CHKERRN(0); /* find the number of nonlocal nonzeros */ num_nz = BSnonlocalnz(A,max_row_len,procinfo); CHKERRN(0); MY_MALLOCN(map_work,(int *),sizeof(int)*(*max_row_len),0); /* set up query */ map = A->map->fglobal2proc; MY_MALLOCN(addrs,(int *),sizeof(int)*num_nz,3); query_len = 0; count = 0; for (i=0;imax+1;i++) { ind = iperm->perm[count]; count += distr->distribution[i]; row = A->rows[ind]; colptr = row->col; (*map)(row->length,colptr,map_work,procinfo,A->map); CHKERRN(0); for (j=0;jlength;j++) { if (map_work[j] != procinfo->my_id) { addrs[query_len] = colptr[j]; query_len++; } } } /* set up and query bulletin board */ trans_bb = BSinit_bb(A->num_rows,A->map);CHKERRN(0); BSpost_noaddr_bb(trans_bb,A->num_rows,gnum->numbers); CHKERRN(0); MY_MALLOCN(answs,(int *),sizeof(int)*num_nz,4); BSquery_match_bb(trans_bb,query_len,addrs,answs,procinfo); CHKERRN(0); MY_FREEN(addrs); BSfree_bb(trans_bb); CHKERRN(0); /* now, do the sorting */ MY_MALLOCN(dwork,(FLOAT *),sizeof(FLOAT)*(*max_row_len),1); MY_MALLOCN(iwork,(int *),sizeof(int)*(*max_row_len),2); /* if keep is true, then allocate array of keys */ if (keep) { key_arr = BSalloc_key_arr(distr->max+1); CHKERRN(0); } else { key_arr = NULL; MY_MALLOCN(key,(int *),sizeof(int)*(*max_row_len),5); } query_len = 0; count = 0; map = A->map->fglobal2local; tperm = BSalloc_permutation((*max_row_len)); CHKERRN(0); for (i=0;imax+1;i++) { ind = iperm->perm[count]; row = A->rows[ind]; colptr = row->col; if (keep) { MY_MALLOCN(key,(int *),sizeof(int)*row->length,5); key_arr->array[i] = key; } (*map)(row->length,colptr,map_work,procinfo,A->map); CHKERRN(0); for (j=0;jlength;j++) { if (map_work[j] < 0) { key[j] = answs[query_len]; query_len++; } else { key[j] = gnum->numbers[map_work[j]]; } } BSreset_permutation(row->length,tperm); CHKERRN(0); for (j=0;jlength;j++) tperm->perm[j] = j; /* now find the inverse permutation according to the new numbers */ BSheap_sort1(row->length,key,tperm->perm); CHKERRN(0); /* permute the column values into iwork and then copy them */ /* into the correct places */ /* also create a floating point work vector */ BSiperm_ivec(colptr,iwork,tperm); CHKERRN(0); for (k=0;klength;k++) { colptr[k] = iwork[k]; } for (j=0;jdistribution[i];j++) { ind = iperm->perm[count]; row = A->rows[ind]; nzptr = row->nz; BSiperm_dvec(nzptr,dwork,tperm); CHKERRN(0); for (k=0;klength;k++) { nzptr[k] = dwork[k]; } count++; } } BSfree_permutation(tperm); CHKERRN(0); if (!keep) { MY_FREE(key); } MY_FREEN(map_work); MY_FREEN(dwork); MY_FREEN(iwork); MY_FREEN(answs); BSfree_permutation(iperm); CHKERRN(0); return(key_arr); }