/*  factorMT.c  */

#include "../spoolesMT.h"
#include "../../Ideq.h"
#include "../../timings.h"

/*--------------------------------------------------------------------*/
/*
   -----------------------------
   worker method for each thread
   -----------------------------
*/
static void * FrontMtx_workerFactor ( void *arg ) ;
/*
   ----------------------------------------------------------------
  this object is used during a threaded factorization
 
   pencil    -- matrix pencil A + sigma * B
   tau       -- upper bound on factor entries when pivoting enabled
   droptol   -- drop tolerance used for sparse fronts
   ownersIV  -- map from fronts to threads
   lookahead -- parameter used to control computation lookahead
      0   --> no lookahead
      > 0 --> look up a number of levels up the tree
 
   frontmtx  -- object used to store factorization
   manager   -- object used to manage working Chv objects
   aggList   -- object used to store aggregate data
   postList  -- object used to store postponed data
   perror    -- pointer to external error flag
 
   heads   -- IP vector to maintain update lists
   myid    -- thread id
   fronts  -- vector of pointers to active fronts
   stats   -- int statistics vector
   cpus    -- double vector to store breakdown of cpu times
   msglvl  -- message level
   msgFile -- message file
 
   created -- 97may30, cca
   ----------------------------------------------------------------
*/
typedef struct _FactorData   FactorData ;
struct _FactorData {
/*
   -------------------------
   global data, not modified
   -------------------------
*/
   Pencil      *pencil   ;
   double      tau       ;
   double      droptol   ;
   IV          *ownersIV ;
   int         lookahead ;
/*
   ---------------------
   shared data, modified
   ---------------------
*/
   FrontMtx    *frontmtx   ;
   ChvManager  *chvmanager ;
   ChvList     *aggList    ;
   ChvList     *postList   ;
   int         *perror     ;
/*
   ----------
   local data
   ----------
*/
   int          myid      ;
   int          stats[10] ;
   double       cpus[20]  ;
   int          msglvl    ;
   FILE         *msgFile  ;
} ;
/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   parallel factorization method for A.
   all but two input parameters are the same as the serial method.
   this is a wrapper method around FrontMtx_MT_factorInpMtx().

   ownersIV  -- pointer to IV object that holds map from fronts 
                to threads
   lookahead -- lookahead parameter that allows computation at
                higher levels of the front tree to proceed when
                lower fronts are not yet finish. use lookahead = 0
                to turn off this option. otherwise lookahead ancestors
                of an active unfinished front can be active.

   return value -- pointer to the first Chv object in a list
                   that contains postponed data
 
   created -- 98may16, cca
   -------------------------------------------------------------------
*/
Chv *
FrontMtx_MT_factorInpMtx (
   FrontMtx     *frontmtx,
   InpMtx       *inpmtx,
   double       tau,
   double       droptol,
   ChvManager   *chvmanager,
   IV           *ownersIV,
   int          lookahead,
   int          *perror,
   double       cpus[],
   int          stats[],
   int          msglvl,
   FILE         *msgFile
) {
Chv      *rootchv ;
double   sigma[2] = {0.0, 0.0} ;
Pencil   pencil ;

Pencil_setDefaultFields(&pencil) ;
Pencil_init(&pencil, frontmtx->type, frontmtx->symmetryflag,
            inpmtx, sigma, NULL) ;
rootchv = FrontMtx_MT_factorPencil(frontmtx, &pencil, tau, droptol,
                                 chvmanager, ownersIV, lookahead, 
                                 perror, cpus, stats, msglvl, msgFile) ;

return(rootchv) ; }

/*--------------------------------------------------------------------*/
/*
   -------------------------------------------------------------------
   parallel factorization method for A + sigma*B.
   all but two input parameters are the same 
   as the FrontMtx_factorPencil method.

   ownersIV  -- pointer to IV object that holds map from fronts 
                to threads
   lookahead -- lookahead parameter that allows computation at
                higher levels of the front tree to proceed when
                lower fronts are not yet finish. use lookahead = 0
                to turn off this option. otherwise lookahead ancestors
                of an active unfinished front can be active.

   return value -- pointer to the first Chv object in a list
                   that contains postponed data
 
   created -- 98may16, cca
   -------------------------------------------------------------------
*/
Chv *
FrontMtx_MT_factorPencil (
   FrontMtx     *frontmtx,
   Pencil       *pencil,
   double       tau,
   double       droptol,
   ChvManager   *chvmanager,
   IV           *ownersIV,
   int          lookahead,
   int          *perror,
   double       cpus[],
   int          stats[],
   int          msglvl,
   FILE         *msgFile
) {
char         buffer[20] ;
Chv          *rootchv ;
ChvList      *aggList ;
ChvList      *postList ;
double       t0, t1, t2 ;
FactorData   *data, *dataObjects ;
FILE         *fp ;
int          ierr, ii, myid, nfront, nthread, rc ;
int          *owners ;
/*
   --------------
   check the data
   --------------
*/
MARKTIME(t0) ;
if (  frontmtx == NULL || pencil == NULL || tau < 1.0 || droptol < 0.0
   || ownersIV == NULL || lookahead < 0 || cpus == NULL || stats == NULL
   || msglvl < 0 || (msglvl > 0 && msgFile == NULL) ) {
   fprintf(stderr, "\n fatal error in FrontMtx_MT_factorPencil()"
           "\n frontmtx = %p, pencil = %p"
           "\n tau = %f, droptol = %f, ownersIV = %p, lookahead = %d"
           "\n cpus = %p, stats = %p, msglvl = %d, msgFile = %p"
           "\n bad input\n", frontmtx, pencil, tau, droptol, 
           ownersIV, lookahead, cpus, stats, msglvl, msgFile) ;
   exit(-1) ;
}
IV_sizeAndEntries(ownersIV, &nfront, &owners) ;
nthread = 1 + IV_max(ownersIV) ;
/*
   --------------------------------------------------------------------
   create :
   (1) a ChvList object to handle the lists of aggregate Chv objects,
   (2) if pivoting is enabled, a ChvList object to handle 
       the lists of postponed Chv objects.
   --------------------------------------------------------------------
*/
MARKTIME(t1) ;
aggList = FrontMtx_aggregateList(frontmtx, ownersIV, 1) ;
if ( FRONTMTX_IS_PIVOTING(frontmtx) ) {
   postList = FrontMtx_postList(frontmtx, ownersIV, 1) ;
} else {
   postList = NULL ;
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : initialize lists and manager", 
           t2 - t1) ;
}
/*
   ------------------
   set the error flag
   ------------------
*/
*perror = -1 ;
/*
   ----------------------------------------------------------
   create nthread FactorData objects and load with their data
   ----------------------------------------------------------
*/
MARKTIME(t1) ;
ALLOCATE(dataObjects, struct _FactorData, nthread) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
   data->pencil     = pencil     ;
   data->tau        = tau        ;
   data->droptol    = droptol    ;
   data->ownersIV   = ownersIV   ;
   data->lookahead  = lookahead  ;
   data->frontmtx   = frontmtx   ;
   data->chvmanager = chvmanager ;
   data->aggList    = aggList    ;
   data->postList   = postList   ;
   data->perror     = perror     ;
   data->myid       = myid       ;
   IVzero(10, data->stats) ;
   DVzero(20, data->cpus) ;
   data->msglvl = msglvl ;
   if ( msglvl > 0 ) {
      sprintf(buffer, "res.%d", myid) ;
      if ( (fp = fopen(buffer, "w")) == NULL ) {
         fprintf(stderr, "\n fatal error, unable to open file %s",
                 buffer) ;
         exit(-1) ;
      }
      data->msgFile = fp ;
   } else {
      data->msgFile = NULL ;
   }
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : initialize data objects", t2 - t1) ;
}
/*
   ---------------------------
   switch over the thread type
   ---------------------------
*/
#if THREAD_TYPE == TT_SOLARIS
/*
   ---------------
   solaris threads
   ---------------
*/
MARKTIME(t1) ;
thr_setconcurrency(nthread) ;
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : set concurrency time", t2 - t1) ;
}
MARKTIME(t1) ;
for ( myid = 0, data = dataObjects ; 
      myid < nthread - 1 ;
      myid++, data++ ) {
   rc = thr_create(NULL, 0, FrontMtx_workerFactor, data, 0, NULL) ;
   if ( rc != 0 ) {
      fprintf(stderr, 
              "\n fatal error, myid = %d, rc = %d from thr_create",
              myid, rc) ;
      exit(-1) ;
   }
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
}
MARKTIME(t1) ;
FrontMtx_workerFactor(data) ;
for ( myid = 0 ; myid < nthread - 1 ; myid++ ) {
   thr_join(0, 0, 0) ;
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
/*
   -------------
   POSIX threads
   -------------
*/
{
pthread_t        *tids ;
pthread_attr_t   attr  ;
void             *status ;
/*
#####   NOTE: for SGI machines, this command must be present
#####         for the thread scheduling to be efficient.
#####         this is NOT a POSIX call, but SGI needs it anyway
pthread_setconcurrency(nthread) ;
*/
pthread_attr_init(&attr) ;
/*
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM) ;
*/
pthread_attr_setscope(&attr, PTHREAD_SCOPE_PROCESS) ;
ALLOCATE(tids, pthread_t, nthread) ;
MARKTIME(t1) ;
for ( myid = 0, data = dataObjects ; myid < nthread ; myid++, data++ ) {
/*
   rc = pthread_create(&tids[myid], &attr, 
                       FrontMtx_workerFactor, data) ;
*/
   rc = pthread_create(&tids[myid], NULL, FrontMtx_workerFactor, data) ;
   if ( rc != 0 ) {
      fprintf(stderr, 
              "\n fatal error, myid = %d, rc = %d from pthread_create",
              myid, rc) ;
      exit(-1) ;
   } else if ( msglvl > 1 ) {
      fprintf(stderr, "\n thread %d created", tids[myid]) ;
   }
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread creation time", t2 - t1) ;
}
MARKTIME(t1) ;
for ( myid = 0 ; myid < nthread ; myid++ ) {
   pthread_join(tids[myid], &status) ;
}
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : thread join time", t2 - t1) ;
}
FREE(tids) ;
pthread_attr_destroy(&attr) ;
}
#endif
/*
   --------------------------------------------
   get a pointer to any postponed root chevrons
   --------------------------------------------
*/
if ( postList != NULL ) {
   rootchv = ChvList_getList(postList, nfront) ;
} else {
   rootchv = NULL ;
}
/*
   -------------------
   fill the statistics
   -------------------
*/
for ( myid = 0, data = dataObjects ; 
      myid < nthread ;
      myid++, data++ ) {
   if ( msglvl > 3 ) {
      fprintf(msgFile, "\n thread %d stats", myid) ;
      IVfp80(msgFile, 10, data->stats, 20, &ierr) ;
      fprintf(msgFile, "\n thread %d cpus", myid) ;
      DVfprintf(msgFile, 10, data->cpus) ;
   }
   for ( ii = 0 ; ii < 10 ; ii++ ) {
      stats[ii] += data->stats[ii] ;
   }
   for ( ii = 0 ; ii <= 10 ; ii++ ) {
      cpus[ii] += data->cpus[ii] ;
   }
}
stats[3] = frontmtx->nentD ;
stats[4] = frontmtx->nentL ;
stats[5] = frontmtx->nentU ;
stats[6] = frontmtx->nlocks ;
stats[7] = aggList->nlocks ;
if ( postList != NULL ) {
   stats[8] = postList->nlocks ;
}
if ( msglvl > 0 ) {
   fprintf(msgFile, 
           "\n\n factorization has finished" 
           "\n %d locks of the front matrix"
           "\n %d locks of the aggregate list",
           frontmtx->nlocks, aggList->nlocks) ;
   if ( postList != NULL ) {
      fprintf(msgFile, "\n %d locks of the aggregate list",
          aggList->nlocks) ;
   }
}
/*
   -------------
   free the data
   -------------
*/
MARKTIME(t1) ;
ChvList_free(aggList) ;
if ( postList != NULL ) {
   ChvList_free(postList) ;
}
FREE(dataObjects) ;
MARKTIME(t2) ;
if ( msglvl > 1 ) {
   fprintf(msgFile, "\n CPU %8.3f : total time", t2 - t1) ;
}

return(rootchv) ; }

/*--------------------------------------------------------------------*/
/*
   ---------------------------------------------
   purpose -- worker method to factor the matrix


   created -- 97may30, cca
   ---------------------------------------------
*/
static void *
FrontMtx_workerFactor (
   void   *arg
) {
char          *status ;
Chv           **fronts ;
ChvList       *aggList, *postList ;
ChvManager    *chvmanager ;
FactorData    *data ;
FrontMtx      *frontmtx ;
Pencil        *pencil ;
double        droptol, tau, t1, t2 ;
double        *cpus ;
DV            *workDV ;
ETree         *frontETree ;
FILE          *msgFile ;
Ideq          *deq ;
int           J, K, lookahead, msglvl, myid, nfront ;
int           *nactiveChild, *owners, *par, *stats ;
IP            **heads ;
IV            *ownersIV, pivotsizesIV ;
Tree          *tree ;
#if THREAD_TYPE == TT_POSIX
/*
fprintf(stdout, "\n ### inside workerFactor, pthread_self() = %d", 
        pthread_self()) ;
fflush(stdout) ;
*/
#endif
/*
   -------------------------------
   extract pointers and dimensions
   -------------------------------
*/
MARKTIME(t1) ;
data = (FactorData *) arg ;
pencil     = data->pencil   ;
tau        = data->tau      ;
droptol    = data->droptol  ;
msglvl     = data->msglvl   ;
msgFile    = data->msgFile  ;
myid       = data->myid     ;
frontmtx   = data->frontmtx ;
chvmanager = data->chvmanager ;
aggList    = data->aggList    ;
postList   = data->postList   ;
frontETree = frontmtx->frontETree ;
tree       = ETree_tree(frontETree) ;
nfront     = ETree_nfront(frontETree) ;
par        = ETree_par(frontETree) ;
lookahead  = data->lookahead ;
ownersIV   = data->ownersIV ;
owners     = IV_entries(ownersIV) ;
stats      = data->stats;
cpus       = data->cpus ;
#if THREAD_TYPE == TT_SOLARIS
if ( msglvl > 2 ) {
   fprintf(stdout, 
           "\n ### inside workerFactor, myid = %d, thr_self() = %d", 
           myid, thr_self()) ;
   fflush(stdout) ;
}
#endif
#if THREAD_TYPE == TT_POSIX
if ( msglvl > 2 ) {
   fprintf(stdout, "\n ### inside workerFactor, myid = %d" 
                   ", pthread_self() = %d", myid, pthread_self()) ;
   fflush(stdout) ;
}
#endif
/*
   ---------------------------------------------------
   initialize the heads[] vector for the owned updates
   ---------------------------------------------------
*/
heads = FrontMtx_factorSetup(frontmtx, ownersIV, myid,
                              msglvl, msgFile) ;
/*
   ----------------------------------------------------------------
   initialize the Ideq object that holds the initial fronts
   of the active paths, owned fronts with no children that
   are owned or updates by this thread. 
   status[J] == 'W' --> J belongs to an active path for this thread
   ----------------------------------------------------------------
*/
status = CVinit(nfront, 'F') ;
deq = FrontMtx_setUpDequeue(frontmtx, IV_entries(ownersIV), myid, 
                             status, heads, 'W', 'F', msglvl, msgFile) ;
FrontMtx_loadActiveLeaves(frontmtx, status, 'W', deq) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n status") ;
   CVfprintf(msgFile, nfront, status) ;
   fflush(msgFile) ;
}
/*
   -----------------------------------------------
   initialize the nactiveChild[] vector,
   nactiveChild[J] measures the number of children 
   that belong to active paths of this thread
   -----------------------------------------------
*/
nactiveChild = FrontMtx_nactiveChild(frontmtx, status, myid) ;
if ( msglvl > 2 ) {
   fprintf(msgFile, "\n\n nactiveChild") ;
   IVfprintf(msgFile, nfront, nactiveChild) ;
   fflush(msgFile) ;
}
/*
   ------------------------------
   initialize the working storage
   ------------------------------
*/
ALLOCATE(fronts, Chv *, nfront) ;
for ( J = 0 ; J < nfront ; J++ ) {
   fronts[J] = NULL ;
}
IV_setDefaultFields(&pivotsizesIV) ;
/*
if ( FRONTMTX_IS_PIVOTING(frontmtx) 
   && (  FRONTMTX_IS_SYMMETRIC(frontmtx) 
      || FRONTMTX_IS_HERMITIAN(frontmtx) ) ) {
   pivotsizesIV = IV_new() ;
} else {
   fprintf(msgFile, "\n no pivotsizesIV needed") ;
   fflush(msgFile) ;
   pivotsizesIV = NULL ;
}
*/
workDV = DV_new() ;
/*
   ---------------------------
   loop while a path is active
   ---------------------------
*/
IVzero(10, stats) ;
while ( (J = Ideq_removeFromHead(deq)) != -1 ) {
   if ( msglvl > 1 ) {
      fprintf(msgFile, "\n\n ### checking out front %d", J) ;
      fflush(msgFile) ;
   }
   FrontMtx_factorVisit(frontmtx, pencil, J, myid, owners, fronts,
      lookahead, data->tau, data->droptol, status, heads, &pivotsizesIV,
      workDV, par, data->aggList, data->postList, 
      data->chvmanager, stats, cpus, msglvl, msgFile) ;
   if ( status[J] == 'E' ) {
/*
      ----------------------------
      error encountered at front J
      ----------------------------
*/
      *(data->perror) = J ;
      break ;
   } else if ( status[J] == 'F' ) {
      if ( msglvl > 1 ) {
         fprintf(msgFile, "\n\n front %d is finished", J) ;
         fflush(msgFile) ;
      }
      if ( (K = par[J]) != -1 && --nactiveChild[K] == 0 ) {
         if ( msglvl > 1 ) {
            fprintf(msgFile, "\n\n adding front %d to dequeue", K) ;
            fflush(msgFile) ;
         }
         Ideq_insertAtHead(deq, K) ;
      }
   } else {
      if ( msglvl > 1 ) {
         fprintf(msgFile, "\n\n front %d not yet done", J) ;
         fflush(msgFile) ;
      }
      Ideq_insertAtTail(deq, J) ;
   }
   if ( *(data->perror) >= 0 ) {
/*
      ------------------------------------
      error encountered, break out of loop
      ------------------------------------
*/
      break ;
   }
}
/*
   ------------------------
   free the working storage
   ------------------------
*/
IV_clearData(&pivotsizesIV) ;
if ( workDV != NULL ) {
   DV_free(workDV) ;
}
CVfree(status) ;
IVfree(nactiveChild) ;
IP_free(heads[nfront+1]) ;
FREE(heads) ;
FREE(fronts) ;
Ideq_free(deq) ;
MARKTIME(t2) ;
cpus[10] = t2 - t1 ;
cpus[9] = t2 - t1 - cpus[0] - cpus[1] - cpus[2] - cpus[3] 
        - cpus[4] - cpus[5] - cpus[6] - cpus[7] - cpus[8] ;

return(NULL) ; }

/*--------------------------------------------------------------------*/


syntax highlighted by Code2HTML, v. 0.9.1