/*
* -- SuperLU MT routine (version 1.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* August 15, 1997
*
*/
#include <stdlib.h>
#include <stdio.h>
#include "pdsp_defs.h"
#define XPAND_HINT(memtype, new_next, jcol, param) {\
fprintf(stderr, "Storage for %12s exceeded; Current column %d; Need at least %d;\n",\
memtype, jcol, new_next); \
fprintf(stderr, "You may set it by the %d-th parameter in routine sp_ienv().\n", param); \
ABORT("Memory allocation failed"); \
}
/*
* Set up pointers for integer working arrays.
*/
void
pxgstrf_SetIWork(int n, int panel_size, int *iworkptr, int **segrep,
int **parent, int **xplore, int **repfnz, int **panel_lsub,
int **marker, int **lbusy)
{
*segrep = iworkptr; /* n */
*parent = iworkptr + n; /* n */
*xplore = iworkptr + 2*n; /* 2*n */
*repfnz = iworkptr + 4*n; /* w*n */
*panel_lsub = iworkptr + 4*n + panel_size*n; /* w*n */
*marker = iworkptr + 4*n + 2*panel_size*n; /* 3*n */
*lbusy = iworkptr + (4+NO_MARKER)*n + 2*panel_size*n; /* n */
ifill (*repfnz, n * panel_size, EMPTY);
}
void
copy_mem_int(int howmany, void *old, void *new)
{
register int i;
int *iold = old;
int *inew = new;
for (i = 0; i < howmany; i++) inew[i] = iold[i];
}
void
user_bcopy(char *src, char *dest, int bytes)
{
char *s_ptr, *d_ptr;
s_ptr = src + bytes - 1;
d_ptr = dest + bytes - 1;
for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
}
int *intMalloc(int n)
{
int *buf;
buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
if ( !buf ) {
fprintf(stderr, "SUPERLU_MALLOC failed for buf in intMalloc()\n");
exit (1);
}
return (buf);
}
int *intCalloc(int n)
{
int *buf;
register int i;
buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
if ( !buf ) {
fprintf(stderr, "SUPERLU_MALLOC failed for buf in intCalloc()\n");
exit (1);
}
for (i = 0; i < n; ++i) buf[i] = 0;
return (buf);
}
/*
* Allocate n elements storage from a global array.
* It uses lock for mutually exclusive access to the next position, so that
* more than one processors can call aalloc on the same array correctly.
* Return value: 0 - success
* >0 - number of bytes allocated when run out of space
*/
int
Glu_alloc(
const int pnum, /* process number */
const int jcol,
const int num, /* number of elements requested */
const MemType mem_type,
int *prev_next, /* return "next" value before allocation */
pxgstrf_shared_t *pxgstrf_shared
)
{
GlobalLU_t *Glu = pxgstrf_shared->Glu;
register int fsupc, nextl, nextu, new_next;
#ifdef PROFILE
double t;
#endif
switch ( mem_type ) {
case LUSUP:
/* Storage for the H-supernode is already set aside, so we do
not use lock here. */
if ( Glu->map_in_sup[jcol] < 0 )
fsupc = jcol + Glu->map_in_sup[jcol];
else fsupc = jcol;
*prev_next = Glu->map_in_sup[fsupc];
Glu->map_in_sup[fsupc] += num;
#if 0
{
register int i, j;
i = fsupc + part_super_h[fsupc];
if ( Glu->dynamic_snode_bound == YES )
new_next = Glu->nextlu;
else new_next = Glu->map_in_sup[i];
if (new_next < 0) { /* relaxed s-node */
for (i = fsupc+1; Glu->map_in_sup[i] < 0; ++i) ;
new_next = Glu->map_in_sup[i];
}
if ( Glu->map_in_sup[fsupc]>Glu->nzlumax
|| Glu->map_in_sup[fsupc]>new_next ) {
printf("(%d) jcol %d, map_[%d]=%d, map_[%d]=new_next %d\n",
pnum, jcol, fsupc, Glu->map_in_sup[fsupc],
i, new_next);
printf("(%d) snode type %d,size %d, |H-snode| %d\n",
pnum, Glu->pan_status[fsupc].type,
Glu->pan_status[fsupc].size, part_super_h[fsupc]);
for (j = fsupc; j < i; j += part_super_h[j])
printf("(%d) H snode %d, size %d\n",
pnum, j, part_super_h[j]);
ABORT("LUSUP exceeded."); /* xiaoye */
}
}
#endif
break;
case UCOL: case USUB:
#ifdef PROFILE
t = SuperLU_timer_();
#endif
#if ( MACH==SUN )
mutex_lock( &pxgstrf_shared->lu_locks[ULOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock( &pxgstrf_shared->lu_locks[ULOCK] );
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock( pxgstrf_shared->lu_locks[ULOCK] )
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard ULOCK
#endif
{
nextu = Glu->nextu;
new_next = nextu + num;
if ( new_next > Glu->nzumax ) {
XPAND_HINT("U columns", new_next, jcol, 7);
}
*prev_next = nextu;
Glu->nextu = new_next;
} /* end of critical region */
#if ( MACH==SUN )
mutex_unlock( &pxgstrf_shared->lu_locks[ULOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock( &pxgstrf_shared->lu_locks[ULOCK] );
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard ULOCK
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
break;
case LSUB:
#ifdef PROFILE
t = SuperLU_timer_();
#endif
#if ( MACH==SUN )
mutex_lock( &pxgstrf_shared->lu_locks[LLOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock( &pxgstrf_shared->lu_locks[LLOCK] );
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock( pxgstrf_shared->lu_locks[LLOCK] )
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard LLOCK
#endif
{
nextl = Glu->nextl;
new_next = nextl + num;
if ( new_next > Glu->nzlmax ) {
XPAND_HINT("L subscripts", new_next, jcol, 8);
}
*prev_next = nextl;
Glu->nextl = new_next;
} /* end of #pragama critical lock() */
#if ( MACH==SUN )
mutex_unlock( &pxgstrf_shared->lu_locks[LLOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock( &pxgstrf_shared->lu_locks[LLOCK]);
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard LLOCK
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
break;
}
return 0;
}
/*
* Set up memory image in lusup[*], using the supernode boundaries in
* the Householder matrix.
*
* In both static and dynamic scheme, the relaxed supernodes (leaves)
* are stored in the beginning of lusup[*]. In the static scheme, the
* memory is also set aside for the internal supernodes using upper
* bound information from H. In the dynamic scheme, however, the memory
* for the internal supernodes is not allocated by this routine.
*
* Return value
* o Static scheme: number of nonzeros of all the supernodes in H.
* o Dynamic scheme: number of nonzeros of the relaxed supernodes.
*/
int
PresetMap(
const int n,
SuperMatrix *A, /* original matrix permuted by columns */
pxgstrf_relax_t *pxgstrf_relax, /* relaxed supernodes */
pdgstrf_options_t *pdgstrf_options, /* input */
GlobalLU_t *Glu /* modified */
)
{
register int i, j, k, w, rs, rs_lastcol, krow, kmark, maxsup, nextpos;
register int rs_nrow; /* number of nonzero rows in a relaxed supernode */
int *marker, *asub, *xa_begin, *xa_end;
NCPformat *Astore;
int *map_in_sup; /* memory mapping function; values irrelevant on entry. */
int *colcnt; /* column count of Lc or H */
int *super_bnd; /* supernodes partition in H */
char *snode_env, *getenv();
snode_env = getenv("SuperLU_DYNAMIC_SNODE_STORE");
if ( snode_env != NULL ) {
Glu->dynamic_snode_bound = YES;
#if ( PRNTlevel>=1 )
printf(".. Use dynamic alg. to allocate storage for L supernodes.\n");
#endif
} else Glu->dynamic_snode_bound = NO;
Astore = A->Store;
asub = Astore->rowind;
xa_begin = Astore->colbeg;
xa_end = Astore->colend;
rs = 1;
marker = intMalloc(n);
ifill(marker, n, EMPTY);
map_in_sup = Glu->map_in_sup = intCalloc(n+1);
colcnt = pdgstrf_options->colcnt_h;
super_bnd = pdgstrf_options->part_super_h;
nextpos = 0;
/* Split large supernode into smaller pieces */
maxsup = sp_ienv(3);
for (j = 0; j < n; ) {
w = super_bnd[j];
k = j + w;
if ( w > maxsup ) {
w = w % maxsup;
if ( w == 0 ) w = maxsup;
while ( j < k ) {
super_bnd[j] = w;
j += w;
w = maxsup;
}
}
j = k;
}
for (j = 0; j < n; j += w) {
if ( Glu->dynamic_snode_bound == NO ) map_in_sup[j] = nextpos;
if ( pxgstrf_relax[rs].fcol == j ) {
/* Column j starts a relaxed supernode. */
map_in_sup[j] = nextpos;
rs_nrow = 0;
w = pxgstrf_relax[rs++].size;
rs_lastcol = j + w;
for (i = j; i < rs_lastcol; ++i) {
/* for each nonzero in A[*,i] */
for (k = xa_begin[i]; k < xa_end[i]; k++) {
krow = asub[k];
kmark = marker[krow];
if ( kmark != j ) { /* first time visit krow */
marker[krow] = j;
++rs_nrow;
}
}
}
nextpos += w * rs_nrow;
/* Find the next H-supernode, with leading column i, which is
outside the relaxed supernode, rs. */
for (i = j; i < rs_lastcol; k = i, i += super_bnd[i]);
if ( i > rs_lastcol ) {
/* The w columns [rs_lastcol, i) may join in the
preceeding relaxed supernode; make sure we leave
enough room for the combined supernode. */
w = i - rs_lastcol;
nextpos += w * MAX(rs_nrow, colcnt[k]);
}
w = i - j;
} else { /* Column j starts a supernode in H */
w = super_bnd[j];
if ( Glu->dynamic_snode_bound == NO ) nextpos += w * colcnt[j];
}
/* Set up the offset (negative) to the leading column j of a
supernode in H */
for (i = 1; i < w; ++i) map_in_sup[j + i] = -i;
} /* for j ... */
if ( Glu->dynamic_snode_bound == YES ) Glu->nextlu = nextpos;
else map_in_sup[n] = nextpos;
#if ( PRNTlevel>=1 )
printf("** PresetMap() allocates %d reals to lusup[*]....\n", nextpos);
#endif
free (marker);
return nextpos;
}
/*
* Dynamically set up storage image in lusup[*], using the supernode
* boundaries in H.
*/
int
DynamicSetMap(
const int pnum, /* process number */
const int jcol, /* leading column of the s-node in H */
const int num, /* number of elements requested */
pxgstrf_shared_t *pxgstrf_shared
)
{
GlobalLU_t *Glu = pxgstrf_shared->Glu;
register int nextlu, new_next;
int *map_in_sup = Glu->map_in_sup; /* modified; memory mapping function */
#ifdef PROFILE
double t = SuperLU_timer_();
#endif
#if ( MACH==SUN )
mutex_lock( &pxgstrf_shared->lu_locks[LULOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_lock( &pxgstrf_shared->lu_locks[LULOCK] );
#elif ( MACH==SGI || MACH==ORIGIN )
#pragma critical lock ( pxgstrf_shared->lu_locks[LULOCK] )
#elif ( MACH==CRAY_PVP )
#pragma _CRI guard LULOCK
#endif
{
nextlu = Glu->nextlu;
map_in_sup[jcol] = nextlu;
new_next = nextlu + num;
if ( new_next > Glu->nzlumax ) {
XPAND_HINT("L supernodes", new_next, jcol, 6);
}
Glu->nextlu = new_next;
} /* end of critical region */
#if ( MACH==SUN )
mutex_unlock( &pxgstrf_shared->lu_locks[LULOCK] );
#elif ( MACH==DEC || MACH==PTHREAD )
pthread_mutex_unlock( &pxgstrf_shared->lu_locks[LULOCK] );
#elif ( MACH==CRAY_PVP )
#pragma _CRI endguard LULOCK
#endif
#ifdef PROFILE
Gstat->procstat[pnum].cs_time += SuperLU_timer_() - t;
#endif
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1