/*  QRfactorMT.c  */

#include "../spoolesMT.h"
#include "../../Ideq.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
/*
   -----------------------------
   worker method for each thread
   -----------------------------
*/
static void * FrontMtx_QR_workerFactor ( void *arg ) ;
/*
   ----------------------------------------------------------------
   this object is used during a threaded factorization
 
   mtxA      -- InpMtx matrix A 
   rowsIVL   -- list[J] contains rows of A to be assembled into front J
   firstnz   -- firstnz[irow] contains first column 
                of first nonzero entry in row
   ownersIV  -- map from fronts to threads
 
   frontmtx   -- object used to store factorization
   chvmanager -- object used to manage working Chv objects
   updlist    -- object used to store update Chv objects
 
   myid      -- thread id
   facops    -- number of factor operations
   cpus      -- double vector to store breakdown of cpu times
   msglvl    -- message level
   msgFile   -- message file
 
   created -- 97may30, cca
   ----------------------------------------------------------------
*/
typedef struct _QR_factorData   QR_factorData ;
struct _QR_factorData {
/*
   -------------------------
   global data, not modified
   -------------------------
*/
   InpMtx      *mtxA     ;
   IVL         *rowsIVL  ;
   int         *firstnz  ;
   IV          *ownersIV ;
/*
   ---------------------
   shared data, modified
   ---------------------
*/
   FrontMtx    *frontmtx   ;
   ChvManager  *chvmanager ;
   ChvList     *updlist    ;
/*
   ----------
   local data
   ----------
*/
   int          myid      ;
   double       facops    ;
   double       cpus[7]   ;
   int          msglvl    ;
   FILE         *msgFile  ;
} ;
/*--------------------------------------------------------------------*/
/*
   ------------------------------------------------------------
   purpose -- compute a QR factorization using multiple threads

   created -- 98may29, cca
   ------------------------------------------------------------
*/
void
FrontMtx_MT_QR_factor (
   FrontMtx     *frontmtx,
   InpMtx       *mtxA,
   ChvManager   *chvmanager,
   IV           *ownersIV,
   double       cpus[],
   double       *pfacops,
   int          msglvl,
   FILE         *msgFile
) {
ChvList         *updlist ;
double          t0, t1 ;
IVL             *rowsIVL ;
int             ithread, myid, nthread, rc ;
int             *firstnz ;
QR_factorData   *data, *dataObjects ;

/*
   ---------------
   check the input
   ---------------
*/
if (  frontmtx == NULL || mtxA == NULL || chvmanager == NULL 
   || ownersIV == NULL || cpus == NULL || pfacops == NULL 
   || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()"
           "\n bad input\n") ;
   exit(-1) ;
}
nthread = 1 + IV_max(ownersIV) ;
/*
   ----------------------------------------------------------------
   create the update Chv list object
   create the rowsIVL object, where
      list(J) = list of rows that are assembled in front J
   firstnz[irowA] = first column with nonzero element in A(irowA,*)
   ----------------------------------------------------------------
*/
MARKTIME(t0) ;
updlist = FrontMtx_postList(frontmtx, ownersIV, LOCK_IN_PROCESS) ;
FrontMtx_QR_setup(frontmtx, mtxA, &rowsIVL, &firstnz, msglvl, msgFile) ;
MARKTIME(t1) ;
cpus[0] = t1 - t0 ;
/*
   ------------------------------------
   create and load nthread data objects
   ------------------------------------
*/
ALLOCATE(dataObjects, struct _QR_factorData, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   data->mtxA       = mtxA       ;
   data->rowsIVL    = rowsIVL    ;
   data->firstnz    = firstnz    ;
   data->ownersIV   = ownersIV   ;
   data->frontmtx   = frontmtx   ;
   data->chvmanager = chvmanager ;
   data->updlist    = updlist    ;
   data->myid       = myid       ;
   DVzero(7, data->cpus) ;
   data->facops = 0.0 ;
   data->msglvl  = msglvl ;
   if ( msglvl > 0 ) {
      char   buffer[20] ;
      sprintf(buffer, "res.%d", myid) ;
      if ( (data->msgFile = fopen(buffer, "w")) == NULL ) {
         fprintf(stderr, "\n fatal error in FrontMtx_MT_QR_factor()"
                 "\n unable to open file %s", buffer) ;
         exit(-1) ;
      }
   } else {
      data->msgFile = NULL ;
   }
}
#if THREAD_TYPE == TT_SOLARIS
/*
   ----------------------------------
   Solaris threads.
   (1) set the concurrency
   (2) create nthread - 1 new threads
   (3) execute own thread
   (4) join the threads
   ----------------------------------
*/
thr_setconcurrency(nthread) ;
for ( myid = 0, data = dataObjects ; 
      myid < nthread - 1 ; 
      myid++, data++ ) {
   rc = thr_create(NULL, 0, FrontMtx_QR_workerFactor, data, 0, NULL) ;
   if ( rc != 0 ) {
      fprintf(stderr, 
              "\n fatal error, myid = %d, rc = %d from thr_create()", 
              myid, rc) ;
      exit(-1) ;
   }
}
FrontMtx_QR_workerFactor(data) ;
for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
   thr_join(0, 0, 0) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
/*
   ----------------------------------
   POSIX threads.
   (1) if SGI, set the concurrency
   (2) create nthread new threads
   (3) join the threads
   ----------------------------------
*/
{
pthread_t        *tids ;
pthread_attr_t   attr  ;
void             *status ;
/*
   ---------------------------------------------------------
   #### NOTE: for SGI machines, this command must be present
   ####       for the thread scheduling to be efficient.
   ####       this is NOT a POSIX call, but necessary
   ---------------------------------------------------------
   pthread_setconcurrency(nthread) ;
*/
   pthread_attr_init(&attr) ;
/*
   pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
*/
   pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ;
   ALLOCATE(tids, pthread_t, nthread) ;
   for ( myid = 0 ; myid < nthread ; myid++ ) {
      tids[myid] = 0 ;
   }
   for ( myid = 0, data = dataObjects ; 
         myid < nthread ; 
         myid++, data++ ) {
      rc = pthread_create(&tids[myid], &attr,
                          FrontMtx_QR_workerFactor, data) ;
      if ( rc != 0 ) {
         fprintf(stderr,
                 "\n fatal error in FrontMtx_MT_QR_factor()"
                 "\n myid = %d, rc = %d from pthread_create()",
                 myid, rc) ;
         exit(-1) ;
      } else if ( msglvl > 2 ) {
         fprintf(stderr, "\n thread %d created", tids[myid]) ;
      }
   }
   for ( myid = 0 ; myid < nthread ; myid++ ) {
      pthread_join(tids[myid], &status) ;
   }
   FREE(tids) ;
   pthread_attr_destroy(&attr) ;
}
#endif
/*
   ----------------------------------------------
   fill the cpu vector and factor operation count
   ----------------------------------------------
*/
*pfacops = 0 ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n thread %d cpus", myid) ;
      DVfprintf(msgFile, 7, data->cpus) ;
   }
   for ( ithread = 0 ; ithread < 7 ; ithread++ ) {
      cpus[ithread] += data->cpus[ithread] ;
   }
   *pfacops += data->facops ;
}
/*
   -------------
   free the data
   -------------
*/
ChvList_free(updlist) ;
IVL_free(rowsIVL) ;
IVfree(firstnz) ;
FREE(dataObjects) ;

return ; }

/*--------------------------------------------------------------------*/
/*
   ----------------------------------------------------
   purpose -- worker method to factor the matrix
 
 
   created -- 98may29, cca
   ----------------------------------------------------
*/
static void *
FrontMtx_QR_workerFactor (
   void   *arg
) {
char            *status ;
ChvList         *updlist ;
ChvManager      *chvmanager ;
double          facops, t0, t1 ;
double          *cpus ;
DV              workDV ;
FILE            *msgFile ;
FrontMtx        *frontmtx ;
Ideq            *dequeue ;
InpMtx          *mtxA ;
int             J, K, myid, neqns, nfront, msglvl ;
int             *colmap, *firstnz, *nactiveChild, *owners, *par ;
IVL             *rowsIVL ;
QR_factorData   *data ;

MARKTIME(t0) ;
data = (QR_factorData *) arg ;
mtxA       = data->mtxA     ;
rowsIVL    = data->rowsIVL  ;
firstnz    = data->firstnz  ;
IV_sizeAndEntries(data->ownersIV, &nfront, &owners) ;
frontmtx   = data->frontmtx   ;
chvmanager = data->chvmanager ;
updlist    = data->updlist    ;
myid       = data->myid       ;
cpus       = data->cpus       ;
msglvl     = data->msglvl     ;
msgFile    = data->msgFile    ;
par        = frontmtx->tree->par ;
neqns      = FrontMtx_neqns(frontmtx) ;
/*
   --------------------------------------------------------
   status[J] = 'F' --> J finished
             = 'W' --> J waiting to be finished
   create the Ideq object to handle the bottom-up traversal
   nactiveChild[K] = # of unfinished children of K,
      when zero, K can be placed on the dequeue
   --------------------------------------------------------
*/
status = CVinit(nfront, 'F') ;
dequeue = FrontMtx_setUpDequeue(frontmtx, owners, myid, status,
                                NULL, 'W', 'F', msglvl, msgFile) ;
FrontMtx_loadActiveLeaves(frontmtx, status, 'W', dequeue) ;
nactiveChild = FrontMtx_nactiveChild(frontmtx, status, myid) ;
colmap = IVinit(neqns, -1) ;
DV_setDefaultFields(&workDV) ;
facops = 0.0 ;
if ( msglvl > 3 ) {
   fprintf(msgFile, "\n owners") ;
   IVfprintf(msgFile, nfront, owners) ;
   fprintf(msgFile, "\n Ideq") ;
   Ideq_writeForHumanEye(dequeue, msgFile) ;
   fflush(msgFile) ;
}
MARKTIME(t1) ;
cpus[0] += t1 - t0 ;
/*
   ---------------------------
   loop while a path is active
   ---------------------------
*/
while ( (J = Ideq_removeFromHead(dequeue)) != -1 ) {
   if ( msglvl > 1 ) {
      fprintf(msgFile, "\n\n ### checking out front %d, owner %d",
              J, owners[J]) ;
   }
   if ( owners[J] == myid ) {
/*
      --------------------------------
      front J is ready to be processed
      --------------------------------
*/
      FrontMtx_QR_factorVisit(frontmtx, J, mtxA, rowsIVL, firstnz,
                              updlist, chvmanager, status, colmap,
                              &workDV, cpus, &facops, msglvl, msgFile) ;
      if ( status[J] == 'F' ) {
/*
         ------------------------------------------
         front J is finished, put parent on dequeue 
         if it exists or all children are finished
         ------------------------------------------
*/
         if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
            Ideq_insertAtHead(dequeue, K) ;
         }
      } else {
/*
         -----------------------------------------------
         front J is not complete, put on tail of dequeue
         -----------------------------------------------
*/
         Ideq_insertAtTail(dequeue, J) ;
      }
   } else {
/*
      -------------------------------------------
      front J is not owned, put parent on dequeue 
      if it exists and all children are finished
      -------------------------------------------
*/
      if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
         Ideq_insertAtHead(dequeue, K) ;
      }
   }
}
data->facops = facops ;
/*
   ------------------------
   free the working storage
   ------------------------
*/
CVfree(status) ;
Ideq_free(dequeue) ;
IVfree(nactiveChild) ;
IVfree(colmap) ;
DV_clearData(&workDV) ;
MARKTIME(t1) ;
cpus[6] = t1 - t0 ;
cpus[5] = t1 - t0 - cpus[0] - cpus[1]
         - cpus[2] - cpus[3] - cpus[4] ;

return(NULL) ; }

/*--------------------------------------------------------------------*/


syntax highlighted by Code2HTML, v. 0.9.1