/* ========================================================================== */
/* === klu_factor =========================================================== */
/* ========================================================================== */

/* Factor the matrix, after ordering and analyzing it with klu_analyze
 * or klu_analyze_given.
 */

#include "klu_internal.h"

/* ========================================================================== */
/* === klu_factor2 ========================================================== */
/* ========================================================================== */

static void klu_factor2
(
    /* inputs, not modified */
    int Ap [ ],		/* size n+1, column pointers */
    int Ai [ ],		/* size nz, row indices */
    Entry Ax [ ],
    klu_symbolic *Symbolic,

    /* inputs, modified on output: */
    klu_numeric *Numeric,
    klu_common *Common
)
{
    double lsize ;
    double *Lnz, *Rs ;
    int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork ;
    int **Lbip, **Ubip, **Lblen, **Ublen ;
    Entry *Offx, *Singleton, *X, s ;
    Unit **LUbx, **Udiag ;
    int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
	nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
	max_unz_block ;

    /* ---------------------------------------------------------------------- */
    /* initializations */
    /* ---------------------------------------------------------------------- */

    /* get the contents of the Symbolic object */
    n = Symbolic->n ;
    P = Symbolic->P ;
    Q = Symbolic->Q ;
    R = Symbolic->R ;
    Lnz = Symbolic->Lnz ;
    nblocks = Symbolic->nblocks ;
    nzoff = Symbolic->nzoff ;

    Pnum = Numeric->Pnum ;
    Offp = Numeric->Offp ;
    Offi = Numeric->Offi ;
    Offx = (Entry *) Numeric->Offx ;
    Singleton = (Entry *) Numeric->Singleton ;

    Lbip = Numeric->Lbip ;
    Ubip = Numeric->Ubip ;
    Lblen = Numeric->Lblen ;
    Ublen = Numeric->Ublen ;
    LUbx = (Unit **) Numeric->LUbx ;
    Udiag = (Unit **) Numeric->Udiag ;

    Rs = Numeric->Rs ;
    Pinv = Numeric->Pinv ;
    X = (Entry *) Numeric->Xwork ;		/* X is of size n */
    Iwork = Numeric->Iwork ;			/* 5*maxblock for klu_factor */
						/* 1*maxblock for Pblock */
    Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
    Common->nrealloc = 0 ;
    scale = Common->scale ;
    max_lnz_block = 1 ;
    max_unz_block = 1 ;

    /* compute the inverse of P from symbolic analysis.  Will be updated to
     * become the inverse of the numerical factorization when the factorization
     * is done, for use in klu_refactor */
#ifndef NDEBUG
    for (k = 0 ; k < n ; k++)
    {
	Pinv [k] = EMPTY ;
    }
#endif
    for (k = 0 ; k < n ; k++)
    {
	ASSERT (P [k] >= 0 && P [k] < n) ;
	Pinv [P [k]] = k ;
    }
#ifndef NDEBUG
    for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
#endif

    for (block = 0 ; block < nblocks ; block++)
    {
	/* Singleton [block] = 0 ; */
	CLEAR (Singleton [block]) ;
    }

    lnz = 0 ;
    unz = 0 ;
    Common->noffdiag = 0 ;
    Offp [0] = 0 ;

    /* ---------------------------------------------------------------------- */
    /* optionally check input matrix and compute scale factors */
    /* ---------------------------------------------------------------------- */

    if (scale >= 0)
    {
	/* use Pnum as workspace */
	KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;
	if (Common->status < KLU_OK)
	{
	    /* matrix is invalid */
	    return ;
	}
    }

#ifndef NDEBUG
    if (scale > 0)
    {
	for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
    }
#endif

    /* ---------------------------------------------------------------------- */
    /* factor each block using klu */
    /* ---------------------------------------------------------------------- */

    for (block = 0 ; block < nblocks ; block++)
    {

	/* ------------------------------------------------------------------ */
	/* the block is from rows/columns k1 to k2-1 */
	/* ------------------------------------------------------------------ */

	k1 = R [block] ;
	k2 = R [block+1] ;
	nk = k2 - k1 ;
	PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;

	if (nk == 1)
	{

	    /* -------------------------------------------------------------- */
	    /* singleton case */
	    /* -------------------------------------------------------------- */

	    poff = Offp [k1] ;
	    oldcol = Q [k1] ;
	    pend = Ap [oldcol+1] ;
	    CLEAR (s) ;

	    if (scale <= 0)
	    {
		/* no scaling */
		for (p = Ap [oldcol] ; p < pend ; p++)
		{
		    oldrow = Ai [p] ;
		    newrow = Pinv [oldrow] ;
		    if (newrow < k1)
		    {
			Offi [poff] = oldrow ;
			Offx [poff] = Ax [p] ;
			poff++ ;
		    }
		    else
		    {
			ASSERT (newrow == k1) ;
			PRINTF (("Singleton block %d", block)) ;
			PRINT_ENTRY (Ax [p]) ;
			s = Ax [p] ;
		    }
		}
	    }
	    else
	    {
		/* row scaling */
		for (p = Ap [oldcol] ; p < pend ; p++)
		{
		    oldrow = Ai [p] ;
		    newrow = Pinv [oldrow] ;
		    if (newrow < k1)
		    {
			Offi [poff] = oldrow ;
			/*Offx [poff] = Ax [p] / Rs [oldrow] ; */
			SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
			poff++ ;
		    }
		    else
		    {
			ASSERT (newrow == k1) ;
			PRINTF (("Singleton block %d ", block)) ;
			PRINT_ENTRY (Ax[p]) ;
			SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
		    }
		}
	    }

	    Singleton [block] = s ;

	    if (IS_ZERO (s))
	    {
		/* singular singleton */
		Common->status = KLU_SINGULAR ;
		Common->numerical_rank = k1 ;
		Common->singular_col = oldcol ;
		if (Common->halt_if_singular)
		{
		    return ;
		}
	    }

	    Offp [k1+1] = poff ;
	    Pnum [k1] = P [k1] ;
	    lnz++ ;
	    unz++ ;

	}
	else
	{

	    /* -------------------------------------------------------------- */
	    /* construct and factorize the kth block */
	    /* -------------------------------------------------------------- */

	    if (Lnz [block] < 0)
	    {
		/* COLAMD was used - no estimate of fill-in */
		/* use 10 times the nnz in A, plus n */
		lsize = -(Common->initmem) ;
	    }
	    else
	    {
		lsize = Common->initmem_amd * Lnz [block] + nk ;
	    }

	    /* allocates 5 arrays:
	     * Lbip [block], Ubip [block], Lblen [block], Ublen [block],
	     * LUbx [block] */
	    KLU_kernel_factor (nk, Ap, Ai, Ax, Q, lsize,
		    &LUbx [block], Udiag [block], Lblen [block], Ublen [block],
		    Lbip [block], Ubip [block], Pblock, &lnz_block, &unz_block,
		    X, Iwork,
		    /* BTF and scale-related arguments: */
		    k1, Pinv, Rs, Offp, Offi, Offx, Common) ;

	    if (Common->status < KLU_OK ||
	       (Common->status == KLU_SINGULAR && Common->halt_if_singular))
	    {
		/* out of memory, invalid inputs, or singular */
		return ;
	    }

	    PRINTF (("\n----------------------- L %d:\n", block)) ;
	    ASSERT (KLU_valid_LU (nk, TRUE, Lbip [block],
		    Lblen [block], LUbx [block])) ;
	    PRINTF (("\n----------------------- U %d:\n", block)) ;
	    ASSERT (KLU_valid_LU (nk, FALSE, Ubip [block],
		    Ublen [block], LUbx [block])) ;

	    /* -------------------------------------------------------------- */
	    /* get statistics */
	    /* -------------------------------------------------------------- */

	    lnz += lnz_block ;
	    unz += unz_block ;
	    max_lnz_block = MAX (max_lnz_block, lnz_block) ;
	    max_unz_block = MAX (max_unz_block, unz_block) ;

	    if (Lnz [block] == EMPTY)
	    {
		/* revise estimate for subsequent factorization */
		Lnz [block] = MAX (lnz_block, unz_block) ;
	    }

	    /* -------------------------------------------------------------- */
	    /* combine the klu row ordering with the symbolic pre-ordering */
	    /* -------------------------------------------------------------- */

	    PRINTF (("Pnum, 1-based:\n")) ;
	    for (k = 0 ; k < nk ; k++)
	    {
		ASSERT (k + k1 < n) ;
		ASSERT (Pblock [k] + k1 < n) ;
		Pnum [k + k1] = P [Pblock [k] + k1] ;
		PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
		    k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
	    }

	    /* the local pivot row permutation Pblock is no longer needed */
	}
    }
    ASSERT (nzoff == Offp [n]) ;
    PRINTF (("\n------------------- Off diagonal entries:\n")) ;
    ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;

    Numeric->lnz = lnz ;
    Numeric->unz = unz ;
    Numeric->max_lnz_block = max_lnz_block ;
    Numeric->max_unz_block = max_unz_block ;

    /* Numeric->flops = EMPTY ;		TODO not yet computed */

    /* compute the inverse of Pnum */
#ifndef NDEBUG
    for (k = 0 ; k < n ; k++)
    {
	Pinv [k] = EMPTY ;
    }
#endif
    for (k = 0 ; k < n ; k++)
    {
	ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
	Pinv [Pnum [k]] = k ;
    }
#ifndef NDEBUG
    for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ;
#endif

    /* permute scale factors Rs according to pivotal row order */
    if (scale > 0)
    {
	for (k = 0 ; k < n ; k++)
	{
	    REAL (X [k]) = Rs [Pnum [k]] ;
	}
	for (k = 0 ; k < n ; k++)
	{
	    Rs [k] = REAL (X [k]) ;
	}
    }

    PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
    ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;

    /* apply the pivot row permutations to the off-diagonal entries */
    for (p = 0 ; p < nzoff ; p++)
    {
	ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
	Offi [p] = Pinv [Offi [p]] ;
    }

    PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
    ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;

#ifndef NDEBUG
    {
	PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
	Entry *singleton = Numeric->Singleton ;
	for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
	{
	    k1 = R [block] ;
	    k2 = R [block+1] ;
	    nk = k2 - k1 ;
	    PRINTF (("\n======================klu_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
	    if (nk == 1)
	    {
		PRINTF (("singleton  ")) ;
		/* ENTRY_PRINT (singleton [block]) ; */
		PRINT_ENTRY (singleton [block]) ;
	    }
	    else
	    {
		int *Lip, *Uip, *Llen, *Ulen ;
		Unit *LU ;
		Lip = Lbip [block] ;
		Llen = Lblen [block] ;
		LU = (Unit *) Numeric->LUbx [block] ;
		PRINTF (("\n---- L block %d\n", block));
		ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
		Uip = Ubip [block] ;
		Ulen = Ublen [block] ;
		PRINTF (("\n---- U block %d\n", block)) ;
		ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
	    }
	}
    }
#endif
}


/* ========================================================================== */
/* === CLEAR_PTR ============================================================ */
/* ========================================================================== */

/* Set an array of pointers to NULL */

#define CLEAR_PTR(Ptr,size) \
{ \
    int ii ; \
    if (Ptr != NULL) \
    { \
	for (ii = 0 ; ii < size ; ii++) \
	{ \
	    Ptr [ii] = NULL ; \
	} \
    } \
}


/* ========================================================================== */
/* === klu_factor =========================================================== */
/* ========================================================================== */

klu_numeric *KLU_factor		/* returns NULL if error, or a valid
				   klu_numeric object if successful */
(
    /* --- inputs --- */
    int Ap [ ],		/* size n+1, column pointers */
    int Ai [ ],		/* size nz, row indices */
    double Ax [ ],
    klu_symbolic *Symbolic,
    /* -------------- */
    klu_common *Common
)
{
    int n, nzoff, nblocks, maxblock, block, k1, k2, nk, ok = TRUE ;
    int *R ;
    int **Lbip, **Ubip, **Lblen, **Ublen ;
    klu_numeric *Numeric ;
    Unit **Udiag ;
    size_t n1, nzoff1, nunits, s, b6, n3, nk1 ;

    if (Common == NULL)
    {
	return (NULL) ;
    }
    Common->status = KLU_OK ;
    Common->numerical_rank = EMPTY ;
    Common->singular_col = EMPTY ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Symbolic object */
    /* ---------------------------------------------------------------------- */

    /* check for a valid Symbolic object */
    if (Symbolic == NULL)
    {
	Common->status = KLU_INVALID ;
	return (NULL) ;
    }

    n = Symbolic->n ;
    nzoff = Symbolic->nzoff ;
    nblocks = Symbolic->nblocks ;
    maxblock = Symbolic->maxblock ;
    R = Symbolic->R ;
    PRINTF (("klu_factor:  n %d nzoff %d nblocks %d maxblock %d\n",
	n, nzoff, nblocks, maxblock)) ;

    /* ---------------------------------------------------------------------- */
    /* get control parameters and make sure they are in the proper range */
    /* ---------------------------------------------------------------------- */

    Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
    Common->initmem = MAX (1.0, Common->initmem) ;
    Common->tol = MIN (Common->tol, 1.0) ;
    Common->tol = MAX (0.0, Common->tol) ;
    Common->growth = MAX (1.0, Common->growth) ;

    /* ---------------------------------------------------------------------- */
    /* allocate the Numeric object  */
    /* ---------------------------------------------------------------------- */

    /* this will not cause size_t overflow (already checked by klu_symbolic) */
    n1 = ((size_t) n) + 1 ;
    nzoff1 = ((size_t) nzoff) + 1 ;

    Numeric = klu_malloc (sizeof (klu_numeric), 1, Common) ;
    if (Common->status < KLU_OK)
    {
	/* out of memory */
	Common->status = KLU_OUT_OF_MEMORY ;
	return (NULL) ;
    }
    Numeric->nblocks = nblocks ;
    Numeric->Pnum = klu_malloc (n, sizeof (int), Common) ;
    Numeric->Offp = klu_malloc (n1, sizeof (int), Common) ;
    Numeric->Offi = klu_malloc (nzoff1, sizeof (int), Common) ;
    Numeric->Offx = klu_malloc (nzoff1, sizeof (Entry), Common) ;
    Numeric->Singleton = klu_malloc (nblocks, sizeof (Entry), Common) ;

    Numeric->Lbip  = klu_malloc (nblocks, sizeof (int *), Common) ;
    Numeric->Ubip  = klu_malloc (nblocks, sizeof (int *), Common) ;
    Numeric->Lblen = klu_malloc (nblocks, sizeof (int *), Common) ;
    Numeric->Ublen = klu_malloc (nblocks, sizeof (int *), Common) ;
    Numeric->LUbx  = klu_malloc (nblocks, sizeof (Unit *), Common) ;
    Numeric->Udiag = klu_malloc (nblocks, sizeof (Unit *), Common) ;

    if (Common->scale > 0)
    {
	Numeric->Rs = klu_malloc (n, sizeof (double), Common) ;
    }
    else
    {
	/* no scaling */
	Numeric->Rs = NULL ;
    }

    Numeric->Pinv = klu_malloc (n, sizeof (int), Common) ;

    /* allocate permanent workspace for factorization and solve.
     * Note that the solver will use an Xwork of size 4n, whereas
     * the factorization codes use an Xwork of size n and integer space
     * (Iwork) of size 6n. klu_condest uses an Xwork of size 2n. */

    s = klu_mult_size_t (n, sizeof (Entry), &ok) ;
    n3 = klu_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
    b6 = klu_mult_size_t (maxblock, 6 * sizeof (int), &ok) ;
    Numeric->worksize = klu_add_size_t (s, MAX (n3, b6), &ok) ;
    if (!ok)
    {
	/* problem too large (almost impossible to happen here) */
	Common->status = KLU_TOO_LARGE ;
	KLU_free_numeric (&Numeric, Common) ;
	return (NULL) ;
    }

/*
    Numeric->worksize = n * sizeof (Entry) +
	MAX (n * 3 *sizeof (Entry), 6*maxblock * sizeof (int)) ;
*/

    Numeric->Work = klu_malloc (Numeric->worksize, 1, Common) ;
    Numeric->Xwork = Numeric->Work ;
    Numeric->Iwork = (int *) ((Entry *) Numeric->Xwork + n) ;

    if (Common->status < KLU_OK)
    {
	/* out of memory */
	KLU_free_numeric (&Numeric, Common) ;
	return (NULL) ;
    }

    /* clear the pointer arrays, so that klu_free_numeric works OK */
    CLEAR_PTR (Numeric->Lbip,  nblocks) ;
    CLEAR_PTR (Numeric->Ubip,  nblocks) ;
    CLEAR_PTR (Numeric->Lblen, nblocks) ;
    CLEAR_PTR (Numeric->Ublen, nblocks) ;
    CLEAR_PTR (Numeric->LUbx,  nblocks) ;
    CLEAR_PTR (Numeric->Udiag, nblocks) ;

    /* allocate the column pointer arrays for each block */
    Lbip = Numeric->Lbip ;
    Ubip = Numeric->Ubip ;
    Lblen = Numeric->Lblen ;
    Ublen = Numeric->Ublen ;
    Udiag = (Unit **) Numeric->Udiag ;

    for (block = 0 ; block < nblocks ; block++)
    {
	k1 = R [block] ;
	k2 = R [block+1] ;
	nk = k2 - k1 ;
	nk1 = ((size_t) nk) + 1 ;   /* cannot overflow */
	if (nk > 1)
	{
	    nunits = UNITS (Entry, nk) ;
	    Lbip [block]  = klu_malloc (nk1, sizeof (int), Common) ;
	    Ubip [block]  = klu_malloc (nk1, sizeof (int), Common) ;
	    Lblen [block] = klu_malloc (nk1, sizeof (int), Common) ;
	    Ublen [block] = klu_malloc (nk1, sizeof (int), Common) ;
	    Udiag [block] = klu_malloc (nunits, sizeof (Unit), Common) ;
	    if (Common->status < KLU_OK)
	    {
		/* out of memory */
	 	KLU_free_numeric (&Numeric, Common) ;
		return (NULL) ;
	    }
	}
    }

    /* ---------------------------------------------------------------------- */
    /* factorize the blocks */
    /* ---------------------------------------------------------------------- */

    klu_factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;

    /* ---------------------------------------------------------------------- */
    /* return or free the Numeric object */
    /* ---------------------------------------------------------------------- */

    if (Common->status < KLU_OK)
    {
	/* out of memory or inputs invalid */
	KLU_free_numeric (&Numeric, Common) ;
    }
    else if (Common->status == KLU_SINGULAR)
    {
	if (Common->halt_if_singular)
	{
	    /* Matrix is singular, and the Numeric object is only partially
	     * defined because we halted early.  This is the default case for
	     * a singular matrix. */
	    KLU_free_numeric (&Numeric, Common) ;
	}
    }
    else if (Common->status == KLU_OK)
    {
	/* successful non-singular factorization */
	Common->numerical_rank = n ;
	Common->singular_col = n ;
    }
    return (Numeric) ;
}


syntax highlighted by Code2HTML, v. 0.9.1