#include "pdsp_defs.h"

void
pdgstrf_thread_finalize(pdgstrf_threadarg_t *pdgstrf_threadarg, 
			pxgstrf_shared_t *pxgstrf_shared,
			SuperMatrix *A, int *perm_r,
			SuperMatrix *L, SuperMatrix *U
			)
{
/*
 * -- SuperLU MT routine (version 1.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * August 15, 1997
 *
 * Purpose
 * =======
 * 
 * pdgstrf_thread_finalize() performs cleanups after the multithreaded 
 * factorization pdgstrf_thread(). It sets up the L and U data
 * structures, and deallocats the storage associated with the structures
 * pxgstrf_shared and pdgstrf_threadarg.
 *
 * Arguments
 * =========
 *
 * pdgstrf_threadarg (input) pdgstrf_threadarg_t*
 *          The structure contains the parameters to each thread.
 *
 * pxgstrf_shared (input) pxgstrf_shared_t*
 *          The structure contains the shared task queue, the 
 *          synchronization variables, and the L and U data structures.
 *
 * A        (input) SuperMatrix*
 *	    Original matrix A, permutated by columns, of dimension
 *          (A->nrow, A->ncol). The type of A can be:
 *          Stype = NCP; Dtype = _D; Mtype = GE.
 *
 * perm_r   (input) int*, dimension A->nrow
 *          Row permutation vector which defines the permutation matrix Pr,
 *          perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 * L        (output) SuperMatrix*
 *          The factor L from the factorization Pr*A=L*U; use compressed row 
 *          subscripts storage for supernodes, i.e., L has type: 
 *          Stype = SCP, Dtype = _D, Mtype = TRLU.
 *
 * U        (output) SuperMatrix*
 *	    The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
 *          storage scheme, i.e., U has type:
 *          Stype = NCP, Dtype = _D, Mtype = TRU.
 *
 *
 */
    register int nprocs, n, i, iinfo;
    int       nnzL, nnzU;
    pdgstrf_options_t *pdgstrf_options;
    GlobalLU_t *Glu;
    extern ExpHeader *expanders;

    n = A->ncol;
    pdgstrf_options = pdgstrf_threadarg->pdgstrf_options;
    Glu = pxgstrf_shared->Glu;
    Glu->supno[n] = Glu->nsuper;

    countnz(n, pxgstrf_shared->xprune, &nnzL, &nnzU, Glu);
    fixupL(n, perm_r, Glu);
    
#ifdef COMPRESS_LUSUP
    compressSUP(n, pxgstrf_shared->Glu);
#endif

    if ( pdgstrf_options->refact == YES ) {
        /* L and U structures may have changed due to possibly different
	   pivoting, although the storage is available. */
        ((SCPformat *)L->Store)->nnz = nnzL;
	((SCPformat *)L->Store)->nsuper = Glu->supno[n];
	((NCPformat *)U->Store)->nnz = nnzU;
    } else {
	dCreate_SuperNode_Permuted(L, A->nrow, A->ncol, nnzL, Glu->lusup,
				   Glu->xlusup, Glu->xlusup_end, 
				   Glu->lsub, Glu->xlsub, Glu->xlsub_end,
				   Glu->supno, Glu->xsup, Glu->xsup_end,
				   SLU_SCP, SLU_D, SLU_TRLU);
	dCreate_CompCol_Permuted(U, A->nrow, A->ncol, nnzU, Glu->ucol,
				 Glu->usub, Glu->xusub, Glu->xusub_end,
				 SLU_NCP, SLU_D, SLU_TRU);
    }

    /* Combine the INFO returned from individual threads. */
    iinfo = 0;
    nprocs = pdgstrf_options->nprocs;
    for (i = 0; i < nprocs; ++i) {
        if ( pdgstrf_threadarg[i].info ) {
	    if ( iinfo ) iinfo = MIN(iinfo, pdgstrf_threadarg[i].info);
	    else iinfo = pdgstrf_threadarg[i].info;
	}
    }
    *pxgstrf_shared->info = iinfo;

#if ( DEBUGlevel>=2 )
    printf("Last nsuper %d\n", Glu->nsuper);
    QueryQueue(&pxgstrf_shared->taskq);
    PrintGLGU(n, pxgstrf_shared->xprune, Glu);
    PrintInt10("perm_r", n, perm_r);
    PrintInt10("inv_perm_r", n, pxgstrf_shared->inv_perm_r);
#endif

    /* Deallocate the storage used by the parallel scheduling algorithm. */
    ParallelFinalize(pxgstrf_shared);
    SUPERLU_FREE(pdgstrf_threadarg);
    SUPERLU_FREE(pxgstrf_shared->inv_perm_r);
    SUPERLU_FREE(pxgstrf_shared->inv_perm_c);
    SUPERLU_FREE(pxgstrf_shared->xprune);
    SUPERLU_FREE(pxgstrf_shared->ispruned);
    SUPERLU_FREE(expanders);
    expanders = 0;

#if ( DEBUGlevel>=1 )
    printf("** pdgstrf_thread_finalize() called\n");
#endif
}


syntax highlighted by Code2HTML, v. 0.9.1