/* QRfactorMT.c */
#include "../spoolesMT.h"
#include "../../Ideq.h"
#include "../../timings.h"
/*--------------------------------------------------------------------*/
/*
-----------------------------
worker method for each thread
-----------------------------
*/
static void * FrontMtx_QR_workerFactor ( void *arg ) ;
/*
----------------------------------------------------------------
this object is used during a threaded factorization
mtxA -- InpMtx matrix A
rowsIVL -- list[J] contains rows of A to be assembled into front J
firstnz -- firstnz[irow] contains first column
of first nonzero entry in row
ownersIV -- map from fronts to threads
frontmtx -- object used to store factorization
chvmanager -- object used to manage working Chv objects
updlist -- object used to store update Chv objects
myid -- thread id
facops -- number of factor operations
cpus -- double vector to store breakdown of cpu times
msglvl -- message level
msgFile -- message file
created -- 97may30, cca
----------------------------------------------------------------
*/
typedef struct _QR_factorData QR_factorData ;
struct _QR_factorData {
/*
-------------------------
global data, not modified
-------------------------
*/
InpMtx *mtxA ;
IVL *rowsIVL ;
int *firstnz ;
IV *ownersIV ;
/*
---------------------
shared data, modified
---------------------
*/
FrontMtx *frontmtx ;
ChvManager *chvmanager ;
ChvList *updlist ;
/*
----------
local data
----------
*/
int myid ;
double facops ;
double cpus[7] ;
int msglvl ;
FILE *msgFile ;
} ;
/*--------------------------------------------------------------------*/
/*
------------------------------------------------------------
purpose -- compute a QR factorization using multiple threads
created -- 98may29, cca
------------------------------------------------------------
*/
void
FrontMtx_MT_QR_factor (
FrontMtx *frontmtx,
InpMtx *mtxA,
ChvManager *chvmanager,
IV *ownersIV,
double cpus[],
double *pfacops,
int msglvl,
FILE *msgFile
) {
ChvList *updlist ;
double t0, t1 ;
IVL *rowsIVL ;
int ithread, myid, nthread, rc ;
int *firstnz ;
QR_factorData *data, *dataObjects ;
/*
---------------
check the input
---------------
*/
if ( frontmtx == NULL || mtxA == NULL || chvmanager == NULL
|| ownersIV == NULL || cpus == NULL || pfacops == NULL
|| (msglvl > 0 && msgFile == NULL) ) {
fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()"
"\n bad input\n") ;
exit(-1) ;
}
nthread = 1 + IV_max(ownersIV) ;
/*
----------------------------------------------------------------
create the update Chv list object
create the rowsIVL object, where
list(J) = list of rows that are assembled in front J
firstnz[irowA] = first column with nonzero element in A(irowA,*)
----------------------------------------------------------------
*/
MARKTIME(t0) ;
updlist = FrontMtx_postList(frontmtx, ownersIV, LOCK_IN_PROCESS) ;
FrontMtx_QR_setup(frontmtx, mtxA, &rowsIVL, &firstnz, msglvl, msgFile) ;
MARKTIME(t1) ;
cpus[0] = t1 - t0 ;
/*
------------------------------------
create and load nthread data objects
------------------------------------
*/
ALLOCATE(dataObjects, struct _QR_factorData, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
data->mtxA = mtxA ;
data->rowsIVL = rowsIVL ;
data->firstnz = firstnz ;
data->ownersIV = ownersIV ;
data->frontmtx = frontmtx ;
data->chvmanager = chvmanager ;
data->updlist = updlist ;
data->myid = myid ;
DVzero(7, data->cpus) ;
data->facops = 0.0 ;
data->msglvl = msglvl ;
if ( msglvl > 0 ) {
char buffer[20] ;
sprintf(buffer, "res.%d", myid) ;
if ( (data->msgFile = fopen(buffer, "w")) == NULL ) {
fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()"
"\n unable to open file %s", buffer) ;
exit(-1) ;
}
} else {
data->msgFile = NULL ;
}
}
#if THREAD_TYPE == TT_SOLARIS
/*
----------------------------------
Solaris threads.
(1) set the concurrency
(2) create nthread - 1 new threads
(3) execute own thread
(4) join the threads
----------------------------------
*/
thr_setconcurrency(nthread) ;
for ( myid = 0, data = dataObjects ;
myid < nthread - 1 ;
myid++, data++ ) {
rc = thr_create(NULL, 0, FrontMtx_QR_workerFactor, data, 0, NULL) ;
if ( rc != 0 ) {
fprintf(stderr,
"\n fatal error, myid = %d, rc = %d from thr_create()",
myid, rc) ;
exit(-1) ;
}
}
FrontMtx_QR_workerFactor(data) ;
for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
thr_join(0, 0, 0) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
/*
----------------------------------
POSIX threads.
(1) if SGI, set the concurrency
(2) create nthread new threads
(3) join the threads
----------------------------------
*/
{
pthread_t *tids ;
pthread_attr_t attr ;
void *status ;
/*
---------------------------------------------------------
#### NOTE: for SGI machines, this command must be present
#### for the thread scheduling to be efficient.
#### this is NOT a POSIX call, but necessary
---------------------------------------------------------
pthread_setconcurrency(nthread) ;
*/
pthread_attr_init(&attr) ;
/*
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
*/
pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ;
ALLOCATE(tids, pthread_t, nthread) ;
for ( myid = 0 ; myid < nthread ; myid++ ) {
tids[myid] = 0 ;
}
for ( myid = 0, data = dataObjects ;
myid < nthread ;
myid++, data++ ) {
rc = pthread_create(&tids[myid], &attr,
FrontMtx_QR_workerFactor, data) ;
if ( rc != 0 ) {
fprintf(stderr,
"\n fatal error in FrontMtx_MT_QR_factor()"
"\n myid = %d, rc = %d from pthread_create()",
myid, rc) ;
exit(-1) ;
} else if ( msglvl > 2 ) {
fprintf(stderr, "\n thread %d created", tids[myid]) ;
}
}
for ( myid = 0 ; myid < nthread ; myid++ ) {
pthread_join(tids[myid], &status) ;
}
FREE(tids) ;
pthread_attr_destroy(&attr) ;
}
#endif
/*
----------------------------------------------
fill the cpu vector and factor operation count
----------------------------------------------
*/
*pfacops = 0 ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
if ( msglvl > 3 ) {
fprintf(msgFile, "\n thread %d cpus", myid) ;
DVfprintf(msgFile, 7, data->cpus) ;
}
for ( ithread = 0 ; ithread < 7 ; ithread++ ) {
cpus[ithread] += data->cpus[ithread] ;
}
*pfacops += data->facops ;
}
/*
-------------
free the data
-------------
*/
ChvList_free(updlist) ;
IVL_free(rowsIVL) ;
IVfree(firstnz) ;
FREE(dataObjects) ;
return ; }
/*--------------------------------------------------------------------*/
/*
----------------------------------------------------
purpose -- worker method to factor the matrix
created -- 98may29, cca
----------------------------------------------------
*/
static void *
FrontMtx_QR_workerFactor (
void *arg
) {
char *status ;
ChvList *updlist ;
ChvManager *chvmanager ;
double facops, t0, t1 ;
double *cpus ;
DV workDV ;
FILE *msgFile ;
FrontMtx *frontmtx ;
Ideq *dequeue ;
InpMtx *mtxA ;
int J, K, myid, neqns, nfront, msglvl ;
int *colmap, *firstnz, *nactiveChild, *owners, *par ;
IVL *rowsIVL ;
QR_factorData *data ;
MARKTIME(t0) ;
data = (QR_factorData *) arg ;
mtxA = data->mtxA ;
rowsIVL = data->rowsIVL ;
firstnz = data->firstnz ;
IV_sizeAndEntries(data->ownersIV, &nfront, &owners) ;
frontmtx = data->frontmtx ;
chvmanager = data->chvmanager ;
updlist = data->updlist ;
myid = data->myid ;
cpus = data->cpus ;
msglvl = data->msglvl ;
msgFile = data->msgFile ;
par = frontmtx->tree->par ;
neqns = FrontMtx_neqns(frontmtx) ;
/*
--------------------------------------------------------
status[J] = 'F' --> J finished
= 'W' --> J waiting to be finished
create the Ideq object to handle the bottom-up traversal
nactiveChild[K] = # of unfinished children of K,
when zero, K can be placed on the dequeue
--------------------------------------------------------
*/
status = CVinit(nfront, 'F') ;
dequeue = FrontMtx_setUpDequeue(frontmtx, owners, myid, status,
NULL, 'W', 'F', msglvl, msgFile) ;
FrontMtx_loadActiveLeaves(frontmtx, status, 'W', dequeue) ;
nactiveChild = FrontMtx_nactiveChild(frontmtx, status, myid) ;
colmap = IVinit(neqns, -1) ;
DV_setDefaultFields(&workDV) ;
facops = 0.0 ;
if ( msglvl > 3 ) {
fprintf(msgFile, "\n owners") ;
IVfprintf(msgFile, nfront, owners) ;
fprintf(msgFile, "\n Ideq") ;
Ideq_writeForHumanEye(dequeue, msgFile) ;
fflush(msgFile) ;
}
MARKTIME(t1) ;
cpus[0] += t1 - t0 ;
/*
---------------------------
loop while a path is active
---------------------------
*/
while ( (J = Ideq_removeFromHead(dequeue)) != -1 ) {
if ( msglvl > 1 ) {
fprintf(msgFile, "\n\n ### checking out front %d, owner %d",
J, owners[J]) ;
}
if ( owners[J] == myid ) {
/*
--------------------------------
front J is ready to be processed
--------------------------------
*/
FrontMtx_QR_factorVisit(frontmtx, J, mtxA, rowsIVL, firstnz,
updlist, chvmanager, status, colmap,
&workDV, cpus, &facops, msglvl, msgFile) ;
if ( status[J] == 'F' ) {
/*
------------------------------------------
front J is finished, put parent on dequeue
if it exists or all children are finished
------------------------------------------
*/
if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
Ideq_insertAtHead(dequeue, K) ;
}
} else {
/*
-----------------------------------------------
front J is not complete, put on tail of dequeue
-----------------------------------------------
*/
Ideq_insertAtTail(dequeue, J) ;
}
} else {
/*
-------------------------------------------
front J is not owned, put parent on dequeue
if it exists and all children are finished
-------------------------------------------
*/
if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
Ideq_insertAtHead(dequeue, K) ;
}
}
}
data->facops = facops ;
/*
------------------------
free the working storage
------------------------
*/
CVfree(status) ;
Ideq_free(dequeue) ;
IVfree(nactiveChild) ;
IVfree(colmap) ;
DV_clearData(&workDV) ;
MARKTIME(t1) ;
cpus[6] = t1 - t0 ;
cpus[5] = t1 - t0 - cpus[0] - cpus[1]
- cpus[2] - cpus[3] - cpus[4] ;
return(NULL) ; }
/*--------------------------------------------------------------------*/
syntax highlighted by Code2HTML, v. 0.9.1