/* ========================================================================== */
/* === Supernodal/t_cholmod_super_solve ===================================== */
/* ========================================================================== */

/* -----------------------------------------------------------------------------
 * CHOLMOD/Supernodal Module.  Copyright (C) 2005-2006, Timothy A. Davis
 * The CHOLMOD/Supernodal Module is licensed under Version 2.0 of the GNU
 * General Public License.  See gpl.txt for a text of the license.
 * CHOLMOD is also available under other licenses; contact authors for details.
 * http://www.cise.ufl.edu/research/sparse
 * -------------------------------------------------------------------------- */

/* Template routine for cholmod_super_solve.  Supports real or complex L. */

#include "cholmod_template.h"

static int TEMPLATE (cholmod_super_lsolve)
(
    /* ---- input ---- */
    cholmod_factor *L,	/* factor to use for the forward solve */
    /* ---- output ---- */
    cholmod_dense *X,	/* b on input, solution to Lx=b on output */
    /* ---- workspace ---- */
    cholmod_dense *E,	/* workspace of size nrhs*(L->maxesize) */
    /* --------------- */
    cholmod_common *Common
)
{
    double *Lx, *Xx, *Ex ;
    double minus_one [2], one [2] ;
    Int *Lpi, *Lpx, *Ls, *Super ;
    Int nsuper, k1, k2, psi, psend, psx, nsrow, nscol, ii, s,
	nsrow2, n, ps2, j, i, d, nrhs ;

    /* If integer overflow occurs in the BLAS, Common->status is set to
     * CHOLMOD_TOO_LARGE in the caller, and the contents of X are undefined. */
    int blas_ok = TRUE ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    nrhs = X->ncol ;
    Ex = E->x ;
    Xx = X->x ;
    n = L->n ;
    d = X->d ;

    nsuper = L->nsuper ;
    Lpi = L->pi ;
    Lpx = L->px ;
    Ls = L->s ;
    Super = L->super ;
    Lx = L->x ;
    minus_one [0] = -1.0 ;
    minus_one [1] = 0 ;
    one [0] = 1.0 ;
    one [1] = 0 ;

    /* ---------------------------------------------------------------------- */
    /* solve Lx=b */
    /* ---------------------------------------------------------------------- */

    if (nrhs == 1)
    {

	for (s = 0 ; s < nsuper ; s++)
	{
	    k1 = Super [s] ;
	    k2 = Super [s+1] ;
	    psi = Lpi [s] ;
	    psend = Lpi [s+1] ;
	    psx = Lpx [s] ;
	    nsrow = psend - psi ;
	    nscol = k2 - k1 ;
	    nsrow2 = nsrow - nscol ;
	    ps2 = psi + nscol ;
	    ASSERT ((size_t) nsrow2 <= L->maxesize) ;

	    /* L1 is nscol-by-nscol, lower triangular with non-unit diagonal.
	     * L2 is nsrow2-by-nscol.  L1 and L2 have leading dimension of
	     * nsrow.  x1 is nscol-by-nsrow, with leading dimension n.
	     * E is nsrow2-by-1, with leading dimension nsrow2.
	     */

	    /* gather X into E */
	    for (ii = 0 ; ii < nsrow2 ; ii++)
	    {
		/* Ex [ii] = Xx [Ls [ps2 + ii]] ; */
		ASSIGN (Ex,-,ii, Xx,-,Ls [ps2 + ii]) ;
	    }

#ifdef REAL

	    /* solve L1*x1 (that is, x1 = L1\x1) */
	    BLAS_dtrsv ("L", "N", "N",
		nscol,			    /* N:       L1 is nscol-by-nscol */
		Lx + ENTRY_SIZE*psx, nsrow, /* A, LDA:  L1 */
		Xx + ENTRY_SIZE*k1, 1) ;    /* X, INCX: x1 */

	    /* E = E - L2*x1 */
	    BLAS_dgemv ("N",
		nsrow2, nscol,		    /* M, N:    L2 is nsrow2-by-nscol */
		minus_one,		    /* ALPHA:   -1 */
		Lx + ENTRY_SIZE*(psx + nscol),   /* A, LDA:  L2 */
		nsrow,
		Xx + ENTRY_SIZE*k1, 1,	    /* X, INCX: x1 */
		one,			    /* BETA:    1 */
		Ex, 1) ;		    /* Y, INCY: E */

#else

	    /* solve L1*x1 (that is, x1 = L1\x1) */
	    BLAS_ztrsv ("L", "N", "N",
		nscol,			    /* N:       L1 is nscol-by-nscol */
		Lx + ENTRY_SIZE*psx, nsrow, /* A, LDA:  L1 */
		Xx + ENTRY_SIZE*k1, 1) ;    /* X, INCX: x1 */

	    /* E = E - L2*x1 */
	    BLAS_zgemv ("N",
		nsrow2, nscol,		    /* M, N:    L2 is nsrow2-by-nscol */
		minus_one,		    /* ALPHA:   -1 */
		Lx + ENTRY_SIZE*(psx + nscol),   /* A, LDA:  L2 */
		nsrow,
		Xx + ENTRY_SIZE*k1, 1,	    /* X, INCX: x1 */
		one,			    /* BETA:    1 */
		Ex, 1) ;		    /* Y, INCY: E */

#endif

	    /* scatter E back into X */
	    for (ii = 0 ; ii < nsrow2 ; ii++)
	    {
		/* Xx [Ls [ps2 + ii]] = Ex [ii] ; */
		ASSIGN (Xx,-,Ls [ps2 + ii], Ex,-,ii) ;
	    }
	}
    }
    else
    {

	for (s = 0 ; s < nsuper ; s++)
	{
	    k1 = Super [s] ;
	    k2 = Super [s+1] ;
	    psi = Lpi [s] ;
	    psend = Lpi [s+1] ;
	    psx = Lpx [s] ;
	    nsrow = psend - psi ;
	    nscol = k2 - k1 ;
	    nsrow2 = nsrow - nscol ;
	    ps2 = psi + nscol ;
	    ASSERT ((size_t) nsrow2 <= L->maxesize) ;

	    /* E is nsrow2-by-nrhs, with leading dimension nsrow2. */

	    /* gather X into E */
	    for (ii = 0 ; ii < nsrow2 ; ii++)
	    {
		i = Ls [ps2 + ii] ;
		for (j = 0 ; j < nrhs ; j++)
		{
		    /* Ex [ii + j*nsrow2] = Xx [i + j*d] ; */
		    ASSIGN (Ex,-,ii+j*nsrow2, Xx,-,i+j*d) ;
		}
	    }

#ifdef REAL

	    /* solve L1*x1 */
	    BLAS_dtrsm ("L", "L", "N", "N",
		nscol, nrhs,			/* M, N: x1 is nscol-by-nrhs */
		one,				/* ALPHA:  1 */
		Lx + ENTRY_SIZE*psx, nsrow,	/* A, LDA: L1 */
		Xx + ENTRY_SIZE*k1, d) ;	/* B, LDB: x1 */

	    /* E = E - L2*x1 */
	    if (nsrow2 > 0)
	    {
		BLAS_dgemm ("N", "N",
		    nsrow2, nrhs, nscol,	    /* M, N, K */
		    minus_one,			    /* ALPHA:  -1 */
		    Lx + ENTRY_SIZE*(psx + nscol),  /* A, LDA: L2 */
		    nsrow,
		    Xx + ENTRY_SIZE*k1, d,	    /* B, LDB: X1 */
		    one,			    /* BETA:   1 */
		    Ex, nsrow2) ;		    /* C, LDC: E */
	    }

#else

	    /* solve L1*x1 */
	    BLAS_ztrsm ("L", "L", "N", "N",
		nscol, nrhs,			/* M, N: x1 is nscol-by-nrhs */
		one,				/* ALPHA:  1 */
		Lx + ENTRY_SIZE*psx, nsrow,	/* A, LDA: L1 */
		Xx + ENTRY_SIZE*k1, d) ;	/* B, LDB: x1 */

	    /* E = E - L2*x1 */
	    if (nsrow2 > 0)
	    {
		BLAS_zgemm ("N", "N",
		    nsrow2, nrhs, nscol,	    /* M, N, K */
		    minus_one,			    /* ALPHA:  -1 */
		    Lx + ENTRY_SIZE*(psx + nscol),  /* A, LDA: L2 */
		    nsrow,
		    Xx + ENTRY_SIZE*k1, d,	    /* B, LDB: X1 */
		    one,			    /* BETA:   1 */
		    Ex, nsrow2) ;		    /* C, LDC: E */
	    }

#endif

	    /* scatter E back into X */
	    for (ii = 0 ; ii < nsrow2 ; ii++)
	    {
		i = Ls [ps2 + ii] ;
		for (j = 0 ; j < nrhs ; j++)
		{
		    /* Xx [i + j*d] = Ex [ii + j*nsrow2] ; */
		    ASSIGN (Xx,-,i+j*d, Ex,-,ii+j*nsrow2) ;
		}
	    }
	}
    }

    Common->status = CHOLMOD_OK ;
    return (blas_ok) ;
}


static int TEMPLATE (cholmod_super_ltsolve)
(
    /* ---- input ---- */
    cholmod_factor *L,	/* factor to use for the forward solve */
    /* ---- output ---- */
    cholmod_dense *X,	/* b on input, solution to Lx=b on output */
    /* ---- workspace ---- */
    cholmod_dense *E,	/* workspace of size nrhs*(L->maxesize) */
    /* --------------- */
    cholmod_common *Common
)
{
    double *Lx, *Xx, *Ex ;
    double minus_one [2], one [2] ;
    Int *Lpi, *Lpx, *Ls, *Super ;
    Int nsuper, k1, k2, psi, psend, psx, nsrow, nscol, ii, s,
	nsrow2, n, ps2, j, i, d, nrhs ;
    int blas_ok = TRUE ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    nrhs = X->ncol ;
    Ex = E->x ;
    Xx = X->x ;
    n = L->n ;
    d = X->d ;

    nsuper = L->nsuper ;
    Lpi = L->pi ;
    Lpx = L->px ;
    Ls = L->s ;
    Super = L->super ;
    Lx = L->x ;
    minus_one [0] = -1.0 ;
    minus_one [1] = 0 ;
    one [0] = 1.0 ;
    one [1] = 0 ;

    /* ---------------------------------------------------------------------- */
    /* solve L'x=b */
    /* ---------------------------------------------------------------------- */

    if (nrhs == 1)
    {

	for (s = nsuper-1 ; s >= 0 ; s--)
	{
	    k1 = Super [s] ;
	    k2 = Super [s+1] ;
	    psi = Lpi [s] ;
	    psend = Lpi [s+1] ;
	    psx = Lpx [s] ;
	    nsrow = psend - psi ;
	    nscol = k2 - k1 ;
	    nsrow2 = nsrow - nscol ;
	    ps2 = psi + nscol ;
	    ASSERT ((size_t) nsrow2 <= L->maxesize) ;

	    /* L1 is nscol-by-nscol, lower triangular with non-unit diagonal.
	     * L2 is nsrow2-by-nscol.  L1 and L2 have leading dimension of
	     * nsrow.  x1 is nscol-by-nsrow, with leading dimension n.
	     * E is nsrow2-by-1, with leading dimension nsrow2.
	     */

	    /* gather X into E */
	    for (ii = 0 ; ii < nsrow2 ; ii++)
	    {
		/* Ex [ii] = Xx [Ls [ps2 + ii]] ; */
		ASSIGN (Ex,-,ii, Xx,-,Ls [ps2 + ii]) ;
	    }

#ifdef REAL

	    /* x1 = x1 - L2'*E */
	    BLAS_dgemv ("C",
		nsrow2, nscol,		    /* M, N: L2 is nsrow2-by-nscol */
		minus_one,		    /* ALPHA:   -1 */
		Lx + ENTRY_SIZE*(psx + nscol),   /* A, LDA:  L2 */
		nsrow,
		Ex, 1,			    /* X, INCX: Ex */
		one,			    /* BETA:    1 */
		Xx + ENTRY_SIZE*k1, 1) ;    /* Y, INCY: x1 */

	    /* solve L1'*x1 */
	    BLAS_dtrsv ("L", "C", "N",
		nscol,			    /* N:	L1 is nscol-by-nscol */
		Lx + ENTRY_SIZE*psx, nsrow,	    /* A, LDA:  L1 */
		Xx + ENTRY_SIZE*k1, 1) ;	    /* X, INCX: x1 */

#else

	    /* x1 = x1 - L2'*E */
	    BLAS_zgemv ("C",
		nsrow2, nscol,		    /* M, N: L2 is nsrow2-by-nscol */
		minus_one,		    /* ALPHA:   -1 */
		Lx + ENTRY_SIZE*(psx + nscol),   /* A, LDA:  L2 */
		nsrow,
		Ex, 1,			    /* X, INCX: Ex */
		one,			    /* BETA:    1 */
		Xx + ENTRY_SIZE*k1, 1) ;    /* Y, INCY: x1 */

	    /* solve L1'*x1 */
	    BLAS_ztrsv ("L", "C", "N",
		nscol,			    /* N:	L1 is nscol-by-nscol */
		Lx + ENTRY_SIZE*psx, nsrow,	    /* A, LDA:  L1 */
		Xx + ENTRY_SIZE*k1, 1) ;	    /* X, INCX: x1 */

#endif

	}
    }
    else
    {

	for (s = nsuper-1 ; s >= 0 ; s--)
	{
	    k1 = Super [s] ;
	    k2 = Super [s+1] ;
	    psi = Lpi [s] ;
	    psend = Lpi [s+1] ;
	    psx = Lpx [s] ;
	    nsrow = psend - psi ;
	    nscol = k2 - k1 ;
	    nsrow2 = nsrow - nscol ;
	    ps2 = psi + nscol ;
	    ASSERT ((size_t) nsrow2 <= L->maxesize) ;

	    /* E is nsrow2-by-nrhs, with leading dimension nsrow2. */

	    /* gather X into E */
	    for (ii = 0 ; ii < nsrow2 ; ii++)
	    {
		i = Ls [ps2 + ii] ;
		for (j = 0 ; j < nrhs ; j++)
		{
		    /* Ex [ii + j*nsrow2] = Xx [i + j*d] ; */
		    ASSIGN (Ex,-,ii+j*nsrow2, Xx,-,i+j*d) ;
		}
	    }

#ifdef REAL

	    /* x1 = x1 - L2'*E */
	    if (nsrow2 > 0)
	    {
		BLAS_dgemm ("C", "N",
		    nscol, nrhs, nsrow2,	/* M, N, K */
		    minus_one,			/* ALPHA:  -1 */
		    Lx + ENTRY_SIZE*(psx + nscol),  /* A, LDA: L2 */
		    nsrow,
		    Ex, nsrow2,			/* B, LDB: E */
		    one,			/* BETA:   1 */
		    Xx + ENTRY_SIZE*k1, d) ;	/* C, LDC: x1 */
	    }

	    /* solve L1'*x1 */
	    BLAS_dtrsm ("L", "L", "C", "N",
		nscol,	nrhs,			/* M, N: x1 is nscol-by-nrhs */
		one,				/* ALPHA:  1 */
		Lx + ENTRY_SIZE*psx, nsrow,	/* A, LDA: L1 */
		Xx + ENTRY_SIZE*k1, d) ;	/* B, LDB: x1 */

#else

	    /* x1 = x1 - L2'*E */
	    if (nsrow2 > 0)
	    {
		BLAS_zgemm ("C", "N",
		    nscol, nrhs, nsrow2,	/* M, N, K */
		    minus_one,			/* ALPHA:  -1 */
		    Lx + ENTRY_SIZE*(psx + nscol),  /* A, LDA: L2 */
		    nsrow,
		    Ex, nsrow2,			/* B, LDB: E */
		    one,			/* BETA:   1 */
		    Xx + ENTRY_SIZE*k1, d) ;	/* C, LDC: x1 */
	    }

	    /* solve L1'*x1 */
	    BLAS_ztrsm ("L", "L", "C", "N",
		nscol,	nrhs,			/* M, N: x1 is nscol-by-nrhs */
		one,				/* ALPHA:  1 */
		Lx + ENTRY_SIZE*psx, nsrow,	/* A, LDA: L1 */
		Xx + ENTRY_SIZE*k1, d) ;	/* B, LDB: x1 */

#endif

	}
    }

    Common->status = CHOLMOD_OK ;
    return (blas_ok) ;
}

#undef PATTERN
#undef REAL
#undef COMPLEX
#undef ZOMPLEX


syntax highlighted by Code2HTML, v. 0.9.1