#include "pdsp_defs.h" void pdgstrf_thread_finalize(pdgstrf_threadarg_t *pdgstrf_threadarg, pxgstrf_shared_t *pxgstrf_shared, SuperMatrix *A, int *perm_r, SuperMatrix *L, SuperMatrix *U ) { /* * -- SuperLU MT routine (version 1.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * August 15, 1997 * * Purpose * ======= * * pdgstrf_thread_finalize() performs cleanups after the multithreaded * factorization pdgstrf_thread(). It sets up the L and U data * structures, and deallocats the storage associated with the structures * pxgstrf_shared and pdgstrf_threadarg. * * Arguments * ========= * * pdgstrf_threadarg (input) pdgstrf_threadarg_t* * The structure contains the parameters to each thread. * * pxgstrf_shared (input) pxgstrf_shared_t* * The structure contains the shared task queue, the * synchronization variables, and the L and U data structures. * * A (input) SuperMatrix* * Original matrix A, permutated by columns, of dimension * (A->nrow, A->ncol). The type of A can be: * Stype = NCP; Dtype = _D; Mtype = GE. * * perm_r (input) int*, dimension A->nrow * Row permutation vector which defines the permutation matrix Pr, * perm_r[i] = j means row i of A is in position j in Pr*A. * * L (output) SuperMatrix* * The factor L from the factorization Pr*A=L*U; use compressed row * subscripts storage for supernodes, i.e., L has type: * Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise * storage scheme, i.e., U has type: * Stype = NCP, Dtype = _D, Mtype = TRU. * * */ register int nprocs, n, i, iinfo; int nnzL, nnzU; pdgstrf_options_t *pdgstrf_options; GlobalLU_t *Glu; extern ExpHeader *expanders; n = A->ncol; pdgstrf_options = pdgstrf_threadarg->pdgstrf_options; Glu = pxgstrf_shared->Glu; Glu->supno[n] = Glu->nsuper; countnz(n, pxgstrf_shared->xprune, &nnzL, &nnzU, Glu); fixupL(n, perm_r, Glu); #ifdef COMPRESS_LUSUP compressSUP(n, pxgstrf_shared->Glu); #endif if ( pdgstrf_options->refact == YES ) { /* L and U structures may have changed due to possibly different pivoting, although the storage is available. */ ((SCPformat *)L->Store)->nnz = nnzL; ((SCPformat *)L->Store)->nsuper = Glu->supno[n]; ((NCPformat *)U->Store)->nnz = nnzU; } else { dCreate_SuperNode_Permuted(L, A->nrow, A->ncol, nnzL, Glu->lusup, Glu->xlusup, Glu->xlusup_end, Glu->lsub, Glu->xlsub, Glu->xlsub_end, Glu->supno, Glu->xsup, Glu->xsup_end, SLU_SCP, SLU_D, SLU_TRLU); dCreate_CompCol_Permuted(U, A->nrow, A->ncol, nnzU, Glu->ucol, Glu->usub, Glu->xusub, Glu->xusub_end, SLU_NCP, SLU_D, SLU_TRU); } /* Combine the INFO returned from individual threads. */ iinfo = 0; nprocs = pdgstrf_options->nprocs; for (i = 0; i < nprocs; ++i) { if ( pdgstrf_threadarg[i].info ) { if ( iinfo ) iinfo = MIN(iinfo, pdgstrf_threadarg[i].info); else iinfo = pdgstrf_threadarg[i].info; } } *pxgstrf_shared->info = iinfo; #if ( DEBUGlevel>=2 ) printf("Last nsuper %d\n", Glu->nsuper); QueryQueue(&pxgstrf_shared->taskq); PrintGLGU(n, pxgstrf_shared->xprune, Glu); PrintInt10("perm_r", n, perm_r); PrintInt10("inv_perm_r", n, pxgstrf_shared->inv_perm_r); #endif /* Deallocate the storage used by the parallel scheduling algorithm. */ ParallelFinalize(pxgstrf_shared); SUPERLU_FREE(pdgstrf_threadarg); SUPERLU_FREE(pxgstrf_shared->inv_perm_r); SUPERLU_FREE(pxgstrf_shared->inv_perm_c); SUPERLU_FREE(pxgstrf_shared->xprune); SUPERLU_FREE(pxgstrf_shared->ispruned); SUPERLU_FREE(expanders); expanders = 0; #if ( DEBUGlevel>=1 ) printf("** pdgstrf_thread_finalize() called\n"); #endif }