/* ========================================================================== */
/* === Modify/t_cholmod_updown_numkr ======================================== */
/* ========================================================================== */
/* -----------------------------------------------------------------------------
* CHOLMOD/Modify Module. Copyright (C) 2005-2006,
* Timothy A. Davis and William W. Hager.
* The CHOLMOD/Modify Module is licensed under Version 2.0 of the GNU
* General Public License. See gpl.txt for a text of the license.
* CHOLMOD is also available under other licenses; contact authors for details.
* http://www.cise.ufl.edu/research/sparse
* -------------------------------------------------------------------------- */
/* Supernodal numerical update/downdate of rank K = RANK, along a single path.
* This routine operates on a simplicial factor, but operates on adjacent
* columns of L that would fit within a single supernode. "Adjacent" means
* along a single path in the elimination tree; they may or may not be
* adjacent in the matrix L.
*
* external defines: NUMERIC, WDIM, RANK.
*
* WDIM is 1, 2, 4, or 8. RANK can be 1 to WDIM.
*
* A simple method is included (#define SIMPLE). The code works, but is slow.
* It is meant only to illustrate what this routine is doing.
*
* A rank-K update proceeds along a single path, using single-column, dual-
* column, or quad-column updates of L. If a column j and the next column
* in the path (its parent) do not have the same nonzero pattern, a single-
* column update is used. If they do, but the 3rd and 4th column from j do
* not have the same pattern, a dual-column update is used, in which the two
* columns are treated as if they were a single supernode of two columns. If
* there are 4 columns in the path that all have the same nonzero pattern, then
* a quad-column update is used. All three kinds of updates can be used along
* a single path, in a single call to this function.
*
* Single-column update:
*
* When updating a single column of L, each iteration of the for loop,
* below, processes four rows of W (all columns involved) and one column
* of L. Suppose we have a rank-5 update, and columns 2 through 6 of W
* are involved. In this case, W in this routine is a pointer to column
* 2 of the matrix W in the caller. W (in the caller, shown as 'W') is
* held in row-major order, and is 8-by-n (a dense matrix storage format),
* but shown below in column form to match the column of L. Suppose there
* are 13 nonzero entries in column 27 of L, with row indices 27 (the
* diagonal, D), 28, 30, 31, 42, 43, 44, 50, 51, 67, 81, 83, and 84. This
* pattern is held in Li [Lp [27] ... Lp [27 + Lnz [27] - 1], where
* Lnz [27] = 13. The modification of the current column j of L is done
* in the following order. A dot (.) means the entry of W is not accessed.
*
* W0 points to row 27 of W, and G is a 1-by-8 temporary vector.
*
* G[0] G[4]
* G x x x x x . . .
*
* W0
* |
* v
* 27 . . x x x x x . W0 points to W (27,2)
*
*
* row 'W' W column j = 27
* | | | of L
* v v v |
* first iteration of for loop: v
*
* 28 . . 1 5 9 13 17 . x
* 30 . . 2 6 10 14 18 . x
* 31 . . 3 7 11 15 19 . x
* 42 . . 4 8 12 16 20 . x
*
* second iteration of for loop:
*
* 43 . . 1 5 9 13 17 . x
* 44 . . 2 6 10 14 18 . x
* 50 . . 3 7 11 15 19 . x
* 51 . . 4 8 12 16 20 . x
*
* third iteration of for loop:
*
* 67 . . 1 5 9 13 17 . x
* 81 . . 2 6 10 14 18 . x
* 83 . . 3 7 11 15 19 . x
* 84 . . 4 8 12 16 20 . x
*
* If the number of offdiagonal nonzeros in column j of L is not divisible
* by 4, then the switch-statement does the work for the first nz % 4 rows.
*
* Dual-column update:
*
* In this case, two columns of L that are adjacent in the path are being
* updated, by 1 to 8 columns of W. Suppose columns j=27 and j=28 are
* adjacent columns in the path (they need not be j and j+1). Two rows
* of G and W are used as coefficients during the update: (G0, G1) and
* (W0, W1).
*
* G0 x x x x x . . .
* G1 x x x x x . . .
*
* 27 . . x x x x x . W0 points to W (27,2)
* 28 . . x x x x x . W1 points to W (28,2)
*
*
* row 'W' W0,W1 column j = 27
* | | | of L
* v v v |
* | |-- column j = 28 of L
* v v
* update L (j1,j):
*
* 28 . . 1 2 3 4 5 . x - ("-" is not stored in L)
*
* cleanup iteration since length is odd:
*
* 30 . . 1 2 3 4 5 . x x
*
* then each iteration does two rows of both columns of L:
*
* 31 . . 1 3 5 7 9 . x x
* 42 . . 2 4 6 8 10 . x x
*
* 43 . . 1 3 5 7 9 . x x
* 44 . . 2 4 6 8 10 . x x
*
* 50 . . 1 3 5 7 9 . x x
* 51 . . 2 4 6 8 10 . x x
*
* 67 . . 1 3 5 7 9 . x x
* 81 . . 2 4 6 8 10 . x x
*
* 83 . . 1 3 5 7 9 . x x
* 84 . . 2 4 6 8 10 . x x
*
* If the number of offdiagonal nonzeros in column j of L is not even,
* then the cleanup iteration does the work for the first row.
*
* Quad-column update:
*
* In this case, four columns of L that are adjacent in the path are being
* updated, by 1 to 8 columns of W. Suppose columns j=27, 28, 30, and 31
* are adjacent columns in the path (they need not be j, j+1, ...). Four
* rows of G and W are used as coefficients during the update: (G0 through
* G3) and (W0 through W3). j=27, j1=28, j2=30, and j3=31.
*
* G0 x x x x x . . .
* G1 x x x x x . . .
* G3 x x x x x . . .
* G4 x x x x x . . .
*
* 27 . . x x x x x . W0 points to W (27,2)
* 28 . . x x x x x . W1 points to W (28,2)
* 30 . . x x x x x . W2 points to W (30,2)
* 31 . . x x x x x . W3 points to W (31,2)
*
*
* row 'W' W0,W1,.. column j = 27
* | | | of L
* v v v |
* | |-- column j = 28 of L
* | | |-- column j = 30 of L
* | | | |-- column j = 31 of L
* v v v v
* update L (j1,j):
* 28 . . 1 2 3 4 5 . x - - -
*
* update L (j2,j):
* 30 . . 1 2 3 4 5 . # x - - (# denotes modified)
*
* update L (j2,j1)
* 30 . . 1 2 3 4 5 . x # - -
*
* update L (j3,j)
* 31 . . 1 2 3 4 5 . # x x -
*
* update L (j3,j1)
* 31 . . 1 2 3 4 5 . x # x -
*
* update L (j3,j2)
* 31 . . 1 2 3 4 5 . x x # -
*
* cleanup iteration since length is odd:
* 42 . . 1 2 3 4 5 . x x x x
*
*
* ----- CHOLMOD v1.1.1 did the following --------------------------------------
* then each iteration does two rows of all four colummns of L:
*
* 43 . . 1 3 5 7 9 . x x x x
* 44 . . 2 4 6 8 10 . x x x x
*
* 50 . . 1 3 5 7 9 . x x x x
* 51 . . 2 4 6 8 10 . x x x x
*
* 67 . . 1 3 5 7 9 . x x x x
* 81 . . 2 4 6 8 10 . x x x x
*
* 83 . . 1 3 5 7 9 . x x x x
* 84 . . 2 4 6 8 10 . x x x x
*
* ----- CHOLMOD v1.2.0 does the following -------------------------------------
* then each iteration does one rows of all four colummns of L:
*
* 43 . . 1 2 3 4 5 . x x x x
* 44 . . 1 2 3 4 5 . x x x x
* 50 . . 1 3 5 4 5 . x x x x
* 51 . . 1 2 3 4 5 . x x x x
* 67 . . 1 3 5 4 5 . x x x x
* 81 . . 1 2 3 4 5 . x x x x
* 83 . . 1 3 5 4 5 . x x x x
* 84 . . 1 2 3 4 5 . x x x x
*
* This file is included in t_cholmod_updown.c, only.
* It is not compiled separately. It contains no user-callable routines.
*
* workspace: Xwork (WDIM*nrow)
*/
/* ========================================================================== */
/* === loop unrolling macros ================================================ */
/* ========================================================================== */
#undef RANK1
#undef RANK2
#undef RANK3
#undef RANK4
#undef RANK5
#undef RANK6
#undef RANK7
#undef RANK8
#define RANK1(statement) statement
#if RANK < 2
#define RANK2(statement)
#else
#define RANK2(statement) statement
#endif
#if RANK < 3
#define RANK3(statement)
#else
#define RANK3(statement) statement
#endif
#if RANK < 4
#define RANK4(statement)
#else
#define RANK4(statement) statement
#endif
#if RANK < 5
#define RANK5(statement)
#else
#define RANK5(statement) statement
#endif
#if RANK < 6
#define RANK6(statement)
#else
#define RANK6(statement) statement
#endif
#if RANK < 7
#define RANK7(statement)
#else
#define RANK7(statement) statement
#endif
#if RANK < 8
#define RANK8(statement)
#else
#define RANK8(statement) statement
#endif
#define FOR_ALL_K \
RANK1 (DO (0)) \
RANK2 (DO (1)) \
RANK3 (DO (2)) \
RANK4 (DO (3)) \
RANK5 (DO (4)) \
RANK6 (DO (5)) \
RANK7 (DO (6)) \
RANK8 (DO (7))
/* ========================================================================== */
/* === alpha/gamma ========================================================== */
/* ========================================================================== */
#undef ALPHA_GAMMA
#define ALPHA_GAMMA(Dj,Alpha,Gamma,W) \
{ \
double dj = Dj ; \
if (update) \
{ \
for (k = 0 ; k < RANK ; k++) \
{ \
double w = W [k] ; \
double alpha = Alpha [k] ; \
double a = alpha + (w * w) / dj ; \
dj *= a ; \
Alpha [k] = a ; \
Gamma [k] = (- w / dj) ; \
dj /= alpha ; \
} \
} \
else \
{ \
for (k = 0 ; k < RANK ; k++) \
{ \
double w = W [k] ; \
double alpha = Alpha [k] ; \
double a = alpha - (w * w) / dj ; \
dj *= a ; \
Alpha [k] = a ; \
Gamma [k] = w / dj ; \
dj /= alpha ; \
} \
} \
Dj = ((use_dbound) ? (CHOLMOD(dbound) (dj, Common)) : (dj)) ; \
}
/* ========================================================================== */
/* === numeric update/downdate along one path =============================== */
/* ========================================================================== */
static void NUMERIC (WDIM, RANK)
(
int update, /* TRUE for update, FALSE for downdate */
Int j, /* first column in the path */
Int e, /* last column in the path */
double Alpha [ ], /* alpha, for each column of W */
double W [ ], /* W is an n-by-WDIM array, stored in row-major order */
cholmod_factor *L, /* with unit diagonal (diagonal not stored) */
cholmod_common *Common
)
{
#ifdef SIMPLE
#define w(row,col) W [WDIM*(row) + (col)]
/* ---------------------------------------------------------------------- */
/* concise but slow version for illustration only */
/* ---------------------------------------------------------------------- */
double Gamma [WDIM] ;
double *Lx ;
Int *Li, *Lp, *Lnz ;
Int p, k ;
Int use_dbound = IS_GT_ZERO (Common->dbound) ;
Li = L->i ;
Lx = L->x ;
Lp = L->p ;
Lnz = L->nz ;
/* walk up the etree from node j to its ancestor e */
for ( ; j <= e ; j = (Lnz [j] > 1) ? (Li [Lp [j] + 1]) : Int_max)
{
/* update the diagonal entry D (j,j) with each column of W */
ALPHA_GAMMA (Lx [Lp [j]], Alpha, Gamma, (&(w (j,0)))) ;
/* update column j of L */
for (p = Lp [j] + 1 ; p < Lp [j] + Lnz [j] ; p++)
{
/* update row Li [p] of column j of L with each column of W */
Int i = Li [p] ;
for (k = 0 ; k < RANK ; k++)
{
w (i,k) -= w (j,k) * Lx [p] ;
Lx [p] -= Gamma [k] * w (i,k) ;
}
}
/* clear workspace W */
for (k = 0 ; k < RANK ; k++)
{
w (j,k) = 0 ;
}
}
#else
/* ---------------------------------------------------------------------- */
/* dynamic supernodal version: supernodes detected dynamically */
/* ---------------------------------------------------------------------- */
double G0 [RANK], G1 [RANK], G2 [RANK], G3 [RANK] ;
double Z0 [RANK], Z1 [RANK], Z2 [RANK], Z3 [RANK] ;
double *W0, *W1, *W2, *W3, *Lx ;
Int *Li, *Lp, *Lnz ;
Int j1, j2, j3, p0, p1, p2, p3, parent, lnz, pend, k ;
Int use_dbound = IS_GT_ZERO (Common->dbound) ;
Li = L->i ;
Lx = L->x ;
Lp = L->p ;
Lnz = L->nz ;
/* walk up the etree from node j to its ancestor e */
for ( ; j <= e ; j = parent)
{
p0 = Lp [j] ; /* col j is Li,Lx [p0 ... p0+lnz-1] */
lnz = Lnz [j] ;
W0 = W + WDIM * j ; /* pointer to row j of W */
pend = p0 + lnz ;
/* for k = 0 to RANK-1 do: */
#define DO(k) Z0 [k] = W0 [k] ;
FOR_ALL_K
#undef DO
/* for k = 0 to RANK-1 do: */
#define DO(k) W0 [k] = 0 ;
FOR_ALL_K
#undef DO
/* update D (j,j) */
ALPHA_GAMMA (Lx [p0], Alpha, G0, Z0) ;
p0++ ;
/* determine how many columns of L to update at the same time */
parent = (lnz > 1) ? (Li [p0]) : Int_max ;
if (parent <= e && lnz == Lnz [parent] + 1)
{
/* -------------------------------------------------------------- */
/* node j and its parent j1 can be updated at the same time */
/* -------------------------------------------------------------- */
j1 = parent ;
j2 = (lnz > 2) ? (Li [p0+1]) : Int_max ;
j3 = (lnz > 3) ? (Li [p0+2]) : Int_max ;
W1 = W + WDIM * j1 ; /* pointer to row j1 of W */
p1 = Lp [j1] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) Z1 [k] = W1 [k] ;
FOR_ALL_K
#undef DO
/* for k = 0 to RANK-1 do: */
#define DO(k) W1 [k] = 0 ;
FOR_ALL_K
#undef DO
/* update L (j1,j) */
{
double lx = Lx [p0] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
Z1 [k] -= Z0 [k] * lx ; \
lx -= G0 [k] * Z1 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx ;
}
/* update D (j1,j1) */
ALPHA_GAMMA (Lx [p1], Alpha, G1, Z1) ;
p1++ ;
/* -------------------------------------------------------------- */
/* update 2 or 4 columns of L */
/* -------------------------------------------------------------- */
if ((j2 <= e) && /* j2 in the current path */
(j3 <= e) && /* j3 in the current path */
(lnz == Lnz [j2] + 2) && /* column j2 matches */
(lnz == Lnz [j3] + 3)) /* column j3 matches */
{
/* ---------------------------------------------------------- */
/* update 4 columns of L */
/* ---------------------------------------------------------- */
/* p0 and p1 currently point to row j2 in cols j and j1 of L */
parent = (lnz > 4) ? (Li [p0+2]) : Int_max ;
W2 = W + WDIM * j2 ; /* pointer to row j2 of W */
W3 = W + WDIM * j3 ; /* pointer to row j3 of W */
p2 = Lp [j2] ;
p3 = Lp [j3] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) Z2 [k] = W2 [k] ;
FOR_ALL_K
#undef DO
/* for k = 0 to RANK-1 do: */
#define DO(k) Z3 [k] = W3 [k] ;
FOR_ALL_K
#undef DO
/* for k = 0 to RANK-1 do: */
#define DO(k) W2 [k] = 0 ;
FOR_ALL_K
#undef DO
/* for k = 0 to RANK-1 do: */
#define DO(k) W3 [k] = 0 ;
FOR_ALL_K
#undef DO
/* update L (j2,j) and update L (j2,j1) */
{
double lx [2] ;
lx [0] = Lx [p0] ;
lx [1] = Lx [p1] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
Z2 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * Z2 [k] ; \
Z2 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * Z2 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx [0] ;
Lx [p1++] = lx [1] ;
}
/* update D (j2,j2) */
ALPHA_GAMMA (Lx [p2], Alpha, G2, Z2) ;
p2++ ;
/* update L (j3,j), L (j3,j1), and L (j3,j2) */
{
double lx [3] ;
lx [0] = Lx [p0] ;
lx [1] = Lx [p1] ;
lx [2] = Lx [p2] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
Z3 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * Z3 [k] ; \
Z3 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * Z3 [k] ; \
Z3 [k] -= Z2 [k] * lx [2] ; lx [2] -= G2 [k] * Z3 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx [0] ;
Lx [p1++] = lx [1] ;
Lx [p2++] = lx [2] ;
}
/* update D (j3,j3) */
ALPHA_GAMMA (Lx [p3], Alpha, G3, Z3) ;
p3++ ;
/* each iteration updates L (i, [j j1 j2 j3]) */
for ( ; p0 < pend ; p0++, p1++, p2++, p3++)
{
double lx [4], *w0 ;
lx [0] = Lx [p0] ;
lx [1] = Lx [p1] ;
lx [2] = Lx [p2] ;
lx [3] = Lx [p3] ;
w0 = W + WDIM * Li [p0] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w0 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * w0 [k] ; \
w0 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * w0 [k] ; \
w0 [k] -= Z2 [k] * lx [2] ; lx [2] -= G2 [k] * w0 [k] ; \
w0 [k] -= Z3 [k] * lx [3] ; lx [3] -= G3 [k] * w0 [k] ;
FOR_ALL_K
#undef DO
Lx [p0] = lx [0] ;
Lx [p1] = lx [1] ;
Lx [p2] = lx [2] ;
Lx [p3] = lx [3] ;
}
}
else
{
/* ---------------------------------------------------------- */
/* update 2 columns of L */
/* ---------------------------------------------------------- */
parent = j2 ;
/* cleanup iteration if length is odd */
if ((lnz - 2) % 2)
{
double lx [2] , *w0 ;
lx [0] = Lx [p0] ;
lx [1] = Lx [p1] ;
w0 = W + WDIM * Li [p0] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w0 [k] -= Z0 [k] * lx [0] ; lx [0] -= G0 [k] * w0 [k] ; \
w0 [k] -= Z1 [k] * lx [1] ; lx [1] -= G1 [k] * w0 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx [0] ;
Lx [p1++] = lx [1] ;
}
for ( ; p0 < pend ; p0 += 2, p1 += 2)
{
double lx [2][2], w [2], *w0, *w1 ;
lx [0][0] = Lx [p0 ] ;
lx [1][0] = Lx [p0+1] ;
lx [0][1] = Lx [p1 ] ;
lx [1][1] = Lx [p1+1] ;
w0 = W + WDIM * Li [p0 ] ;
w1 = W + WDIM * Li [p0+1] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w [0] = w0 [k] - Z0 [k] * lx [0][0] ; \
w [1] = w1 [k] - Z0 [k] * lx [1][0] ; \
lx [0][0] -= G0 [k] * w [0] ; \
lx [1][0] -= G0 [k] * w [1] ; \
w0 [k] = w [0] -= Z1 [k] * lx [0][1] ; \
w1 [k] = w [1] -= Z1 [k] * lx [1][1] ; \
lx [0][1] -= G1 [k] * w [0] ; \
lx [1][1] -= G1 [k] * w [1] ;
FOR_ALL_K
#undef DO
Lx [p0 ] = lx [0][0] ;
Lx [p0+1] = lx [1][0] ;
Lx [p1 ] = lx [0][1] ;
Lx [p1+1] = lx [1][1] ;
}
}
}
else
{
/* -------------------------------------------------------------- */
/* update one column of L */
/* -------------------------------------------------------------- */
/* cleanup iteration if length is not a multiple of 4 */
switch ((lnz - 1) % 4)
{
case 1:
{
double lx , *w0 ;
lx = Lx [p0] ;
w0 = W + WDIM * Li [p0] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w0 [k] -= Z0 [k] * lx ; lx -= G0 [k] * w0 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx ;
}
break ;
case 2:
{
double lx [2], *w0, *w1 ;
lx [0] = Lx [p0 ] ;
lx [1] = Lx [p0+1] ;
w0 = W + WDIM * Li [p0 ] ;
w1 = W + WDIM * Li [p0+1] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w0 [k] -= Z0 [k] * lx [0] ; \
w1 [k] -= Z0 [k] * lx [1] ; \
lx [0] -= G0 [k] * w0 [k] ; \
lx [1] -= G0 [k] * w1 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx [0] ;
Lx [p0++] = lx [1] ;
}
break ;
case 3:
{
double lx [3], *w0, *w1, *w2 ;
lx [0] = Lx [p0 ] ;
lx [1] = Lx [p0+1] ;
lx [2] = Lx [p0+2] ;
w0 = W + WDIM * Li [p0 ] ;
w1 = W + WDIM * Li [p0+1] ;
w2 = W + WDIM * Li [p0+2] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w0 [k] -= Z0 [k] * lx [0] ; \
w1 [k] -= Z0 [k] * lx [1] ; \
w2 [k] -= Z0 [k] * lx [2] ; \
lx [0] -= G0 [k] * w0 [k] ; \
lx [1] -= G0 [k] * w1 [k] ; \
lx [2] -= G0 [k] * w2 [k] ;
FOR_ALL_K
#undef DO
Lx [p0++] = lx [0] ;
Lx [p0++] = lx [1] ;
Lx [p0++] = lx [2] ;
}
}
for ( ; p0 < pend ; p0 += 4)
{
double lx [4], *w0, *w1, *w2, *w3 ;
lx [0] = Lx [p0 ] ;
lx [1] = Lx [p0+1] ;
lx [2] = Lx [p0+2] ;
lx [3] = Lx [p0+3] ;
w0 = W + WDIM * Li [p0 ] ;
w1 = W + WDIM * Li [p0+1] ;
w2 = W + WDIM * Li [p0+2] ;
w3 = W + WDIM * Li [p0+3] ;
/* for k = 0 to RANK-1 do: */
#define DO(k) \
w0 [k] -= Z0 [k] * lx [0] ; \
w1 [k] -= Z0 [k] * lx [1] ; \
w2 [k] -= Z0 [k] * lx [2] ; \
w3 [k] -= Z0 [k] * lx [3] ; \
lx [0] -= G0 [k] * w0 [k] ; \
lx [1] -= G0 [k] * w1 [k] ; \
lx [2] -= G0 [k] * w2 [k] ; \
lx [3] -= G0 [k] * w3 [k] ;
FOR_ALL_K
#undef DO
Lx [p0 ] = lx [0] ;
Lx [p0+1] = lx [1] ;
Lx [p0+2] = lx [2] ;
Lx [p0+3] = lx [3] ;
}
}
}
#endif
}
/* prepare this file for another inclusion in t_cholmod_updown.c: */
#undef RANK
syntax highlighted by Code2HTML, v. 0.9.1