/*! \file main.c */ /*! \mainpage M I G R A T E * \section intro Introduction * Migrate is a program to estimate population-genetic parameters from genetic data such as * electrophoretic marker data, microsatellite data, DNA or RNA sequence data, and single * nucleotide polymorphisms. Migrate uses the concepts of Maximum likelihood and Bayesian * inference to infer parameters based on coalescence theory. For more information related * to the use of the program or the theory behind check out * http://popgen.scs.fsu.edu/migrate.html * * \section install Installation * to install the program simply do * \subsection step1 configure * \subsection step2 make * \subsection step3 sudo make install * * \section maintainer Maintainer and Copyrights * Peter Beerli, * beerli@scs.fsu.edu * * Copyright 1997-2002 Peter Beerli and Joseph Felsenstein, Seattle WA\n * Copyright 2003-2007 Peter Beerli, Tallahassee FL\n * * This software is distributed free of charge for non-commercial use * and is copyrighted. Of course, I do not guarantee that the software * works and am not responsible for any damage you may cause or have. * */ /* $Id: main.c 758 2007-07-03 14:12:40Z beerli $*/ #include "migration.h" #include "sighandler.h" #include "heating.h" #include "bayes.h" #include "world.h" #include "data.h" #include "options.h" #include "mcmc.h" #include "broyden.h" #include "combroyden.h" #include "menu.h" #include "random.h" #include "sequence.h" #include "tree.h" #include "tools.h" #include "profile.h" #include "aic.h" #include "lrt.h" #include "migrate_mpi.h" #include "migevents.h" #include "skyline.h" #include "reporter.h" #ifdef PRETTY #include "pretty.h" #endif /*PRETTY*/ #include #ifdef UEP #include "uep.h" #endif #ifdef DMALLOC_FUNC_CHECK #include #endif //use this if you have nice() and need to be nice with others #ifdef HAVE_NICE #include #endif #ifdef PRETTY #ifdef WIN32 extern void set_haru_handler(void); #endif #endif /* Definitions for MCMCMC -- heated chains * definitions of the first four heated chains, they are ordered from cold to hot */ #define EARTH universe[0] /*!< cold chain has always a temperature=1 */ #define VENUS universe[1] /*!< second coldest chain */ #define MERKUR universe[2] /*!< thrid coldest chain */ #define SUN universe[3] /*!< fourth coldest chain */ /* GLOBAL VARIABLES ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ int myID; /*!< myID=0 for single-cpu program and master in MPI program, worker nodes myID > 0 */ int myRepID; /*!< myID=0 for single-cpu program and master in MPI program, worker nodes myID > 0 */ int color; /*!< either 1 or MPI_UNDEFINED, in anticipation of more complicated MPI schemes */ int numcpu; /*!< used if MPI is running otherwise == 1 */ int locidone; /*!< used in MPI workers */ #ifdef MEMDEBUG #include FILE *memfile; struct timeval memt_start, memt_finish; double memelapsed; long totalsize; #endif #ifdef MPI filedb_fmt filedb[30]; long filenum; MPI_Comm comm_world; /*!< parallel environment that contains knowledge about all workers and master */ //MPI_Comm comm_workers; /*!< parallel environment that contains knowledge about all workers */ MPI_Group worker_group; MPI_Group world_group; #endif #ifdef SLOWNET int profiledone; /*!< used in MPI workers in profiles on slow networks */ #endif #ifdef PTHREADS tpool_t heating_pool; /*!< when compiled with PTHREADS then holds all threads */ #else #ifndef tpool_t #define tpool_t char #endif tpool_t heating_pool; #endif // random generator related global variables long *seed; /*!< contains the seed of the random number */ long *newseed; /*!< contains the new random seed */ char *generator; /*!< string that shows what random number generator is used */ // for bayesian output counter, initialized in bayes_init() long *mdimfilecount; #ifdef PRETTY // for pretty printing pdf_doc doc; pdf_page page; pdf_contents canvas; int page_counter; char pdf_pagetitle[LINESIZE+1]; float page_height; float page_width; #endif int setup_locus (long locus, world_fmt * world, option_fmt * options, data_fmt * data); void condense_time (world_fmt * world, long *step, long *j, MYREAL * accepted, long *G, long *steps, long oldsteps, long rep); void calculate_BF(world_fmt **universe, option_fmt *options); long set_repkind (option_fmt * options); void heating_init (world_fmt ** universe, int usize, data_fmt * data, option_fmt * options); void heating_prepare (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, long rep); void heating_prepare2 (world_fmt ** universe, int usize); long replicate_number (option_fmt * options, long chain, char type, long replicate); void combine_loci_standard (char type, option_fmt * options, world_fmt * world, long *Gmax); void combine_loci_mpi (char type, option_fmt * options, world_fmt * world, long *Gmax); void set_penalizer (long chain, long chains, char type, world_fmt ** universe); void run_sampler (option_fmt * options, data_fmt * data, world_fmt ** universe, int usize, long *outfilepos, long *Gmax); void run_replicate (long locus, long replicate, world_fmt ** universe, option_fmt * options, data_fmt * data, tpool_t * localheating_pool, int usize, long *treefilepos, long *Gmax); void run_locus (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, tpool_t * localheating_pool, long maxreplicate, long locus, long *treefilepos, long *Gmax); void run_loci (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, tpool_t * localheating_pool, long maxreplicate, long *treefilepos, long *Gmax); void run_one_update (world_fmt * world); void run_updates (world_fmt ** universe, int usize, option_fmt * options, tpool_t * localheating_pool, long inc, long increment, long step, long steps); void heated_swap (world_fmt ** universe, worldoption_fmt * options); void change_chaintype (long locus, char *type, world_fmt * world, long *increment, long *oldsteps, long *chains, option_fmt * options); void prepare_next_chain (world_fmt ** universe, worldoption_fmt * options, char type, long chain, long *chains, long *pluschain, long locus, long replicate); void print_bayesfactor(FILE *file, world_fmt **universe, option_fmt * options); void print_burnin_stop(FILE *file, world_fmt **universe, option_fmt * options); void print_heating_progress (world_fmt ** universe, worldoption_fmt * options, long stepinc); long analyze_olddata (world_fmt * world, option_fmt * options, data_fmt * data, long *outfilepos); void print_theta0(FILE *file, world_fmt *world, long maxreplicate); void profile_tables (option_fmt * options, world_fmt * world, long *gmaxptr); void finish_mac (option_fmt * options, data_fmt * data); int setup_locus (long locus, world_fmt * world, option_fmt * options, data_fmt * data); long set_repkind (option_fmt * options); void heating_prepare2 (world_fmt ** universe, int usize); long replicate_number (option_fmt * options, long chain, char type, long replicate); void print_heating_progress2 (FILE * file, worldoption_fmt * options, world_fmt ** universe); void get_bayeshist (world_fmt * world, option_fmt * options); void get_treedata (world_fmt * world, option_fmt * options); void get_mighistdata (world_fmt * world, option_fmt * options); void change_longsum_times (world_fmt * world); extern void unset_penalizer_function (boolean inprofiles); /// /// specifies the classes of override the menu as a parameter to the program enum override_enum { OVERRIDE_NO, OVERRIDE_MENU, OVERRIDE_NOMENU }; /// /// the program migrate calculates migration rates and population sizes /// from genetic data, it allows for a wide variety of data types and many options /// \callgraph int main (int argc, char **argv) { // for debug to stop at first statement of main() char c; int usemenu = OVERRIDE_NO; int argument; char type = 's'; data_fmt *data; #ifdef INTEGRATEDLIKE long maxreplicate; #endif int usize = 1; long Gmax = 0; long locus; long outfilepos = 0; option_fmt *options; world_fmt **universe; #ifdef MPI int server; #endif #ifdef HAVE_NICE nice (10); //nice value arbitrarily set to 10, 5 is still nice, 0 is standard #endif // puts(argv[0]); #ifdef PRETTY #ifdef WIN32 set_haru_handler(); #endif #endif #ifdef MPI // parallel version filenum = 0; //setting up the filename database so that worker node can write back to master MPI_Init (&argc, &argv); comm_world = MPI_COMM_WORLD; MPI_Comm_size (comm_world, &numcpu); MPI_Comm_rank (comm_world, &myID); MPI_Comm_group (comm_world, &world_group); server = MASTER; //server ID MPI_Group_excl (world_group, 1, &server, &worker_group); //MPI_Comm_create (comm_world, worker_group, &comm_workers); //fprintf(stderr,"MIGRATE 2.2 RUN (after comm create comm_workers)\n"); //if (myID != MASTER) // MPI_Comm_rank (comm_workers, &myRepID); // fprintf(stderr,"%i> MIGRATE 2.2 RUN\n",myID); // locus counter for workers, this should report the number of loci // each nodes really works on locidone = 0; //if(myID==0) // c=fgetc(stdin); //MYMPIBARRIER (comm_world); #ifdef SLOWNET // slow network parallel version profiledone = 0; which_calc_like (SINGLELOCUS); #endif #else /*MPI*/ //scalar version of migrate myID = MASTER; numcpu = 1; #endif /*MPI*/ #ifdef MEMDEBUG totalsize = 0 ; memfile = fopen("memoryfile","w+"); gettimeofday(&memt_start, NULL); #endif seed = (long *) mymalloc (sizeof (long) * 3); newseed = (long *) mymalloc (sizeof (long) * 3); generator = (char *) mycalloc (1,sizeof(char) * 80); universe = (world_fmt **) mycalloc (HEATED_CHAIN_NUM, sizeof (world_fmt *)); // try to catch and beautify some error messages signalhandling (ON); // create main data structures create_data (&data); create_world (&(EARTH), 1L); create_options (&options); // parmfile and menu init_options (options); if (myID == MASTER) { if (argc > 1) // user uses another parmfile // addition of an option to override the menu command in the parmfile // migrate-n -menu ==> use menu independent of the parmfile setting // migrate-n -nomenu ==> does use menu even if parmfile says so { argument = 1; while(argument < argc) { //fprintf(stderr,"<|%s|>\n",argv[argument]); //fflush(stderr); if(argv[argument][0]!='-') { strncpy (options->parmfilename, argv[argument], (long) strlen (argv[argument]) + 1); } else { if(strcmp(argv[argument],"-menu")==0) { usemenu = OVERRIDE_MENU; } else { if(strcmp(argv[argument],"-nomenu")==0) { usemenu = OVERRIDE_NOMENU; } } } argument++; } } read_options_master (options); switch(usemenu) { case OVERRIDE_MENU: options->menu = TRUE; break; case OVERRIDE_NOMENU: options->menu = FALSE; break; default: break; } print_menu_title (stdout, options); get_menu (options); if (options->bayes_infer) { options->schains = 0; options->lchains = 1; } #ifdef MPI MYMPIBARRIER (comm_world); broadcast_options_master (options); #endif } else { #ifdef MPI MYMPIBARRIER (comm_world); broadcast_options_worker (options); #endif //options->writelog = FALSE; options->menu = FALSE; } // initialization and data reading phase if(options->heating) { usize = MAX (1, options->heated_chains); } else { usize = 1; } universe = (world_fmt **) myrealloc (universe, usize * sizeof (world_fmt *)); init_files (EARTH, data, options); if (options->writelog && myID == MASTER) { print_menu_title (options->logfile, options); } EARTH->repkind = SINGLECHAIN; //fprintf(stderr,"myID=%i myRepID=%i\n",myID, myRepID); unset_penalizer_function (FALSE); // install penalizer for far jumps in MCMC // sampling phase if (!options->readsum) { //MCMC sampling run_sampler (options, data, universe, usize, &outfilepos, &Gmax); } else { //reanalyze sumfile Gmax = analyze_olddata (EARTH, options, data, &outfilepos); } unset_penalizer_function (TRUE); #ifdef MPI //with MPI:the workers stay here much longer than the Master combine_loci_mpi (type, options, EARTH, &Gmax); #else combine_loci_standard (type, options, EARTH, &Gmax); #endif if (options->bayes_infer) { get_bayeshist (EARTH, options); } else { // gather the treedata(1) into sumfile or from worker nodes get_treedata (EARTH, options); } // printing of results if (myID == MASTER) { if (options->bayes_infer) bayes_stat (EARTH); else { print_list (universe, options, data); #ifdef PRETTY pdf_print_results (&page_height, universe, options, data); // pdf_print_mcmctable(&page_height, page_width, EARTH, data, options); #endif if (EARTH->options->gamma) print_alpha_curve (EARTH, EARTH->atl, &Gmax); if(options->printcov) print_cov(EARTH, EARTH->numpop, EARTH->loci, EARTH->cov); fflush (EARTH->outfile); if (EARTH->options->lratio->counter > 0) print_lratio_test (EARTH, &Gmax); if (options->aic) akaike_information (EARTH, &Gmax); } #ifdef MPI get_mighistdata (EARTH, options); #endif if(options->skyline) print_expected_values(EARTH); if(options->mighist || options->skyline) { print_event_values(EARTH); } #ifdef UEP // print UEP probabilities if (options->uep) analyze_uep (EARTH); #endif // profile tables unset_penalizer_function (TRUE); //now we calculate profile and shut down // the penalizing of far jumps in the maximizer function } #ifdef SLOWNET // if(myID == MASTER) //{ //if there are many loci but only few parameters to estimate // this will be still slow and therefore saturating the network // with parameter requests may still be the better solution // for some - compile using make mpi instead make mpis which_calc_like (PROFILE); if (myID == MASTER) { // release the workers from the mpi_maximize_worker() function. // the workers will stop working on locus-likelihoods and advance to profiles mpi_send_stop (EARTH); } if (!options->bayes_infer) profile_tables (options, EARTH, &Gmax); //} #else /* SLOWNET*/ if (myID == MASTER) { #ifdef INTEGRATEDLIKE if(options->integrated_like) { maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; print_theta0(stdout, EARTH, maxreplicate); profile_tables (options, EARTH, &Gmax); } #else if (!options->bayes_infer) { profile_tables (options, EARTH, &Gmax); } #endif /*INTEGRATEDLIKE*/ } #endif /* SLOWNET */ #ifdef MPI if (myID == MASTER) { #endif if(EARTH->options->bayes_infer) { fprintf(EARTH->outfile,"MCMC run characteristics: Autocorrelation and Effective sample size\n"); fprintf(EARTH->outfile,"-------------------------------------------------------------------\n\n"); print_param_eff_size(EARTH->outfile,EARTH,EARTH->numpop2,2,EARTH->auto_archive, EARTH->ess_archive); #ifdef PRETTY pdf_bayes_print_ess(EARTH); #endif } #ifdef NEWVERSION print_bayesfactor(EARTH->outfile, universe,options); if(EARTH->options->burnin_autostop == 'a') { print_burnin_stop(EARTH->outfile, universe, options); } #endif /*NEWVERSION*/ if(EARTH->options->progress) { if(EARTH->options->bayes_infer) { fprintf(stdout,"\nMCMC run characteristics: Autocorrelation and Effective sample size\n"); fprintf(stdout,"-------------------------------------------------------------------\n\n"); print_param_eff_size(stdout,EARTH,EARTH->numpop2,2,EARTH->auto_archive, EARTH->ess_archive); #ifdef NEWVERSION print_bayesfactor(stdout, universe,options); #endif /*NEWVERSION*/ } } if(EARTH->options->treeinmemory) { for(locus=0;locusloci; locus++) { fprintf(EARTH->treefile,"%s", EARTH->treespace[locus]); } } #ifdef PRETTY pdf_print_end_time(&page_height); // write out to PDF file pdf_write_file(options); #endif print_finish (EARTH, outfilepos); // closing all files #ifdef MAC finish_mac (options, data); #endif exit_files (EARTH, data, options); #ifdef MPI # ifndef SLOWNET mpi_send_stop (EARTH); //stop workers // printf("%i> at barrier in slownet-main before finalize\n", myID); # endif } MYMPIBARRIER (comm_world); MPI_Finalize (); #endif /*MPI*/ myfree(seed); myfree(newseed); myfree(generator); free_universe(universe, usize, options); destroy_data(data); destroy_options(options); // for debugging with MallocDebug on macosx // while(TRUE) // { // sleep(1); //} //end for debugging return 0; } /* main end */ /// /// Calculates the maximum likelihood estimates from the trees that were gathered for each locus /// \callgraph void combine_loci_standard (char type, option_fmt * options, world_fmt * world, long *Gmax) { long kind = MULTILOCUS; if (options->readsum) { if (world->loci == 1) kind = SINGLELOCUS; } #ifndef INTEGRATEDLIKE if ((world->loci - world->skipped > 1) || (options->readsum)) #else if ((world->loci - world->skipped > 1) || (options->readsum)) #endif { if (options->gamma) world->param0[world->numpop2] = world->options->alphavalue; world->repkind = set_repkind (options); decide_plot (world->options, world->options->lchains, world->options->lchains, 'l'); #ifdef LONGSUM change_longsum_times (world); //multilocus #endif /*LONGSUM*/ #ifdef INTEGRATEDLIKE (void) estimateParameter (options->replicatenum, *Gmax, world, options,\ world->cov[world->loci], 0 /* chain */ ,\ type, kind, world->repkind); #else if(!options->bayes_infer) { (void) estimateParameter (options->replicatenum, *Gmax, world, options, world->cov[world->loci], 0 /* chain */ , type, kind, world->repkind); } #endif } } /// /// Calculates the maximum likelihood estimates from the trees that were gathered for each locus /// \callgraph void combine_loci_mpi (char type, option_fmt * options, world_fmt * world, long *Gmax) { #ifdef MPI long kind = MULTILOCUS; if (options->readsum) { if (world->loci == 1) kind = SINGLELOCUS; } if ((world->loci - world->skipped > 0) || (options->readsum)) { if (options->gamma) world->param0[world->numpop2] = world->options->alphavalue; world->repkind = set_repkind (options); if (myID == MASTER) { if(!options->bayes_infer) { mpi_gmax_master (world, Gmax); mpi_startparam_master (world); decide_plot (world->options, world->options->lchains, world->options->lchains, 'l'); } #ifdef LONGSUM change_longsum_times (world); //multilocus #endif /*LONGSUM*/ // broadcast Gmax to all workers, the worker pendant is in mpi_gmax_worker if (!options->bayes_infer) { MYMPIBCAST (Gmax, 1, MPI_LONG, MASTER, comm_world); (void) estimateParameter (options->replicatenum, *Gmax, world, options, world->cov[world->loci], 0 /* chain */ , type, kind, world->repkind); } } else { // printf("%i> before gmax worker\n", myID); if(!options->bayes_infer) { mpi_gmax_worker (world); mpi_startparam_worker (world); } // printf("%i> start worker postlike calculations\n", myID); mpi_maximize_worker (world, MULTILOCUS, options->replicatenum); // receive broadcast for Gmax } } // else // { // if (myID != MASTER) // { // maxreplicate = (options->replicate // && options->replicatenum > // 0) ? options->replicatenum : 1; // fprintf(stdout,"about to pack result buffer and send too"); // mpi_results_worker(MIGMPI_SUMFILE, world, maxreplicate, pack_result_buffer); // //mpi_maximize_worker(world, options->replicatenum); // } // } #endif } /// /// runs the MCMC sampling void run_sampler (option_fmt * options, data_fmt * data, world_fmt ** universe, int usize, long *outfilepos, long *Gmax) { long i; long maxreplicate; long treefilepos; #ifdef PRETTY float left_margin; #endif /*PRETTY*/ #ifdef MPI //#ifdef MPIREPLICANT // MPI_Status status; // contains status used in MPI_Recv MPI_Request *irequests; // contains requests generated in MPI_Isend MPI_Status *istatus; // conatins stata from MPI_Wait_any long minnodes; // long sent; // counter for the array irequests of the MPI_Isend requests //#endif long *twolongs; // message sent to workers contains locus and replicate number //int *ranks; //MPI_Group worldgroup, locigroup; if (myID < data->loci + 1) color = 1; else color = MPI_UNDEFINED; twolongs = (long *) mycalloc (TWO, sizeof (long)); #endif #ifdef MPI if (myID == MASTER) { #endif get_data (data->infile, data, options); #ifdef MPI // printf("%i > finished data reading\n", myID); broadcast_data_master (data, options); if (numcpu == 1) { error ("This program was compiled to use a parallel computer\n and you tried to run it on only a single node.\nThis will not work because it uses a \n\"single_master-many_worker\" architecture \nand needs at least TWO nodes\n"); } } else { broadcast_data_worker (data, options); } //MPI_Comm_dup(comm_world, &comm_all_worlds); //MPI_Comm_rank(comm_world, &myID); #endif set_plot (options); EARTH->cold = TRUE; // this is the cold chain when we use heating, the hotter chains have FALSE init_world (EARTH, data, options); set_meanmu(EARTH,options); #ifdef PTHREADS tpool_init (&heating_pool, usize, usize, 0); #endif if (options->heating) //first in universe is the cold chain[EARTH] heating_init (universe, usize, data, options); getseed (options); //#ifdef MPI // MYMPIBARRIER(comm_world); //#endif /* calculate FST values */ #ifdef MPI if(myID!=MASTER) { if(options->printfst || options->thetaguess==FST || options->migrguess==FST) { calc_simple_param (EARTH, data); } } #endif /* report to screen */ if (myID == MASTER) { print_menu_options (EARTH, options, data); if (options->progress) print_data_summary (stdout, EARTH, options, data); /* print to outfile */ #ifdef PRETTY //////////////////////////////////// pdf_init(); // add first page pdf_new_page(options->title); pdf_master_title(options->title, &page_height, &left_margin); pdf_print_options(EARTH, options, data, &page_height, &left_margin); pdf_print_data_summary(EARTH, options, data, &page_height, &left_margin); pdf_print_data (EARTH, options, data, &page_height, &left_margin); //////////////////////////////////// #endif *outfilepos = print_title (EARTH, options); print_options (EARTH->outfile, EARTH, options, data); print_data_summary (EARTH->outfile, EARTH, options, data); print_data (EARTH, options, data); print_spectra (EARTH, options, data); if(options->bayes_infer && myID == MASTER) { if(options->has_bayesmdimfile) print_bayes_mdimfileheader(EARTH->bayesmdimfile, options->bayesmdiminterval, EARTH); } } if (options->lchains < 2 && options->replicatenum == 0) options->replicate = 0; maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; #ifdef MPI minnodes = MAX (0, numcpu - data->loci - 1); irequests = (MPI_Request *) mycalloc (minnodes + 1, sizeof (MPI_Request)); istatus = (MPI_Status *) mycalloc (minnodes + 1, sizeof (MPI_Status)); if (myID == MASTER) { mpi_runloci_master (data->loci, EARTH->who, EARTH, options->readsum); #ifndef MPIREPLICANT sent = mpi_send_stop_mcmc_worker (numcpu, data->loci, &comm_world, irequests,istatus, myID); #endif } else { mpi_runloci_worker (universe, usize, options, data, &heating_pool, maxreplicate, &treefilepos, Gmax); #ifdef MPIREPLICANT // the first worker will always be here waiting //if (myID == FIRSTWORKER) //{ // MYMPIRECV (twolongs, TWO, MPI_LONG, MASTER, 0, comm_world, &status); #ifdef MPI_DEBUG // fprintf(stdout,"%i> FIRSTWORKER kill off now non-loci-worker\n",myID); #endif // sent = mpi_send_stop_mcmc_replicateworker(numcpu,data->loci); // mpi_send_stop_mcmc_worker (numcpu, data->loci, &comm_workers, // irequests, istatus, myID); // } #endif /*MPIREPLICANT*/ } myfree(istatus); myfree(irequests); #else /*MPI*/ run_loci (universe, usize, options, data, &heating_pool, maxreplicate, &treefilepos, Gmax); #endif /*MPI*/ #ifdef PTHREADS tpool_destroy (heating_pool, 1); #endif #ifdef MPI if (myID != MASTER) { #endif if (options->heating) { for (i = 0; i < options->heated_chains; i++) { //free_tree(universe[i]->root, universe[i]); free_timevector (universe[i]->treetimes); } } else { //free_tree(EARTH->root, EARTH); free_timevector (EARTH->treetimes); } #ifdef MPI } myfree(twolongs); #endif } /// generate samples of genealogies for all loci void run_loci (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, tpool_t * localheating_pool, long maxreplicate, long *treefilepos, long *Gmax) { long locus; // long i; for (locus = 0; locus < data->loci; locus++) { run_locus (universe, usize, options, data, localheating_pool, maxreplicate, locus, treefilepos, Gmax); free_datapart(data,options,locus); } } /// save all genealogy summaries void get_treedata (world_fmt * world, option_fmt * options) { #ifdef MPI long maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; #endif if (myID == MASTER) { if (options->writesum) { #ifdef MPI //get all sumfile data from the workers using unpack_sumfile_buffer() mpi_results_master (MIGMPI_SUMFILE, world, maxreplicate, unpack_sumfile_buffer); #endif write_savesum (world); } #ifdef SLOWNET else { // printf("%i> about to receive the sumfile data\n", myID); mpi_results_master (MIGMPI_SUMFILE, world, maxreplicate, unpack_sumfile_buffer); // printf("%i> all sumfile data received and unpacked\n", myID); } #endif } } /// /// save all genealogy summaries void get_bayeshist (world_fmt * world, option_fmt * options) { #ifdef MPI long maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; if (myID == MASTER) { mpi_results_master (MIGMPI_BAYESHIST, world, maxreplicate, unpack_bayes_buffer); } #endif } /// get migration event time data /// and skyline data when present void get_mighistdata (world_fmt * world, option_fmt * options) { #ifdef MPI long maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; if (myID == MASTER) { if (options->mighist) { //get all mighist data from the workers using unpack_mighist_buffer() mpi_results_master (MIGMPI_MIGHIST, world, maxreplicate, unpack_mighist_buffer); if(options->skyline) { //get all mighist data from the workers using unpack_mighist_buffer() fprintf(stdout,"%i> before unpack skyline\n",myID); mpi_results_master (MIGMPI_SKYLINE, world, maxreplicate, unpack_skyline_buffer); } } if(world->options->treeprint ==BEST && world->options->treeinmemory==TRUE) { mpi_results_master (MIGMPI_TREESPACE, world, maxreplicate, unpack_treespace_buffer); } } #endif } /// swap the tree pointer between chains with different temperatures void heated_swap (world_fmt ** universe, worldoption_fmt * options) { long sm = 0, mv = 0, vw = 0; long r; switch (r = RANDINT (0, options->heated_chains - 2)) { case 2: sm = chance_swap_tree (SUN, MERKUR); MERKUR->swapped += sm; break; case 1: VENUS->swapped += (mv = chance_swap_tree (MERKUR, VENUS)); break; case 0: EARTH->swapped += (vw = chance_swap_tree (VENUS, EARTH)); break; default: universe[r]->swapped += chance_swap_tree (universe[r + 1], universe[r]); } } /// change the type of the chain form short to long and reset the treelist void change_chaintype (long locus, char *type, world_fmt * world, long *increment, long *oldsteps, long *chains, option_fmt * options) { if (*type == 's') { *type = 'l'; create_treetimelist (world, &(world->treetimes), locus); set_bounds (increment, oldsteps, chains, options, *type); world->increment = *increment; } } /// prepare the next chain void prepare_next_chain (world_fmt ** universe, worldoption_fmt * options, char type, long chain, long *chains, long *pluschain, long locus, long replicate) { long i; EARTH->likelihood[0] = EARTH->likelihood[EARTH->G]; if (options->heating) { for (i = 1; i < options->heated_chains; i++) universe[i]->likelihood[0] = universe[i]->likelihood[universe[i]->G]; } EARTH->treetimes[0].copies = 0; if (type == 'l') { if (options->replicate && options->replicatenum > 0) EARTH->chainlikes[locus][replicate] = EARTH->param_like; else EARTH->chainlikes[locus][chain] = EARTH->param_like; } EARTH->start = FALSE; if (type == 'l') { if (chain < *chains + *pluschain) { if (((EARTH->param_like > options->lcepsilon) || (options->gelman && EARTH->convergence->gelmanmaxRall > GELMAN_MYSTIC_VALUE)) && !(options->replicate && options->replicatenum == 0)) { (*chains)++; (*pluschain)--; } } } } /// /// resetting of accept and accept_freq for heated chains void reset_heated_accept(world_fmt **universe, long unum) { long i; for(i=0; i < unum; i++) { universe[i]->swapped = 0; universe[i]->accept = 0L; universe[i]->accept_freq = 0.; } } /// print progress about heated chains void print_heating_progress (world_fmt ** universe, worldoption_fmt * options, long stepinc) { MYREAL fstepinc = (MYREAL) stepinc; if (options->heating) { #ifdef DEBUG_MPI printf("%i> stepinc=%li =========\n",myID, stepinc); #endif universe[0]->accept_freq /= fstepinc; universe[1]->accept_freq /= fstepinc; universe[2]->accept_freq /= fstepinc; universe[3]->accept_freq /= fstepinc; if (options->progress) print_heating_progress2 (stdout, options, universe); if (options->writelog) print_heating_progress2 (options->logfile, options, universe); } } /// /// sub-function of print_heating, in MPI mode, the string printing will minimize data transfer. void print_heating_progress2 (FILE * file, worldoption_fmt * options, world_fmt ** universe) { char *plog; long plogsize = 0L; plog = (char *) mycalloc(LONGLINESIZE,sizeof(char)); plogsize = sprintf(plog, " Swapping between %li temperatures.\n", options->heated_chains); if (options->heated_chains > 4) plogsize += sprintf (plog + plogsize, " Only the 4 coldest chains are shown\n"); plogsize += sprintf (plog + plogsize," Temperature | Accepted | Swaps between temperatures\n"); if (options->heated_chains > 4) plogsize += sprintf (plog + plogsize, " %10.4g | %4.2f | %5li||\n", universe[3]->averageheat, universe[3]->accept_freq, universe[3]->swapped); else plogsize += sprintf (plog + plogsize, " %10.4g | %4.2f | |\n", universe[3]->averageheat, universe[3]->accept_freq); plogsize += sprintf (plog + plogsize, " %10.4f | %4.2f | %5li ||\n", universe[2]->averageheat, universe[2]->accept_freq, universe[2]->swapped); plogsize += sprintf (plog + plogsize," %10.4f | %4.2f | %5li ||\n", universe[1]->averageheat, universe[1]->accept_freq, universe[1]->swapped); plogsize += sprintf (plog + plogsize," %10.4f | %4.2f | %5li |\n", universe[0]->averageheat, universe[0]->accept_freq, universe[0]->swapped); FPRINTF(file,"%s",plog); myfree(plog); } /// analyze old sumfile data long analyze_olddata (world_fmt * world, option_fmt * options, data_fmt * data, long *outfilepos) { #ifndef MPI long locus; #else long listofthings[6]; #endif #ifdef PRETTY float left_margin; #endif long Gmax = 0; long pop; options->heating = FALSE; options->gelman = FALSE; if (myID == MASTER) { unset_penalizer_function (TRUE); Gmax = read_savesum (world, options, data); data->popnames = (char **) mymalloc (sizeof (char *) * world->numpop); for (pop = 0; pop < world->numpop; pop++) { data->popnames[pop] = (char *) mycalloc (1, sizeof (char) * LINESIZE); MYSNPRINTF (data->popnames[pop], LINESIZE, "Pop_%li ", pop + 1); } #ifdef MPI MYMPIBCAST (data->geo, world->numpop2, MPI_DOUBLE, MASTER, comm_world); MYMPIBCAST (data->lgeo, world->numpop2, MPI_DOUBLE, MASTER, comm_world); } else { MYMPIBCAST (listofthings, 6, MPI_LONG, MASTER, comm_world); // printf("listofthings received\n"); // fflush(stdout); world->loci = listofthings[0]; world->numpop = listofthings[1]; world->numpop2 = listofthings[2]; options->replicate = (boolean) listofthings[3]; options->replicatenum = listofthings[4]; Gmax = listofthings[5]; data->numpop = world->numpop; data->geo = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop2); data->lgeo = (MYREAL *) mycalloc (1, sizeof (MYREAL) * world->numpop2); MYMPIBCAST (data->geo, world->numpop2, MPI_DOUBLE, MASTER, comm_world); MYMPIBCAST (data->lgeo, world->numpop2, MPI_DOUBLE, MASTER, comm_world); options->muloci = data->loci = world->loci; init_world (world, data, options); #endif } #ifdef MPI #ifdef SLOWNET mpi_broadcast_results (world, world->loci, pack_sumfile_buffer, unpack_sumfile_buffer); #endif #endif synchronize_param (world, options); if (options->plot) { options->plotnow = TRUE; world->options->plotnow = TRUE; } world->locus = 0; if (myID == MASTER) { *outfilepos = print_title (world, options); print_options (stdout, world, options, data); print_options (world->outfile, world, options, data); #ifdef PRETTY //////////////////////////////////// pdf_init(); // add first page pdf_new_page(options->title); pdf_master_title(options->title, &page_height, &left_margin); page_height -= 20; // option title should not straddle the line pdf_print_options(world, options, data, &page_height, &left_margin); //////////////////////////////////// #endif #ifdef MPI MYMPIBARRIER(comm_world); printf("%i> before mpi_runlocimaster\n",myID);fflush(stdout); mpi_runloci_master (world->loci, world->who, world, options->readsum); #else for (locus = 0; locus < world->loci; locus++) { if (options->replicate && options->replicatenum > 0) { world->locus = locus; world->repkind = MULTIPLERUN; #ifdef LONGSUM change_longsum_times (world); //multi run replicates #endif /*LONGSUM*/ if (!options->bayes_infer) { (void) estimateParameter (options->replicatenum, Gmax, world, options, world->cov[locus], 1, 'l', SINGLELOCUS, world->repkind); } } } #endif } #ifdef MPI else { MYMPIBARRIER(comm_world); printf("%i> before assignloci_worker\n",myID);fflush(stdout); assignloci_worker (world, options, &Gmax); // some workers might still wait in assignloci_worker(), will be stopped through the master } #endif return Gmax; } /*analyze_olddata()*/ /// generate profile likelihood tables void profile_tables (option_fmt * options, world_fmt * world, long *gmaxptr) { long len; #ifndef SLOWNET #ifdef PRETTY long location = 0; #endif long i; #endif if (options->profile != NONE) { world->buffer = (char *) myrealloc (world->buffer, SUPERLINESIZE * sizeof (char)); if (options->printprofsummary) allocate_profile_percentiles (world); #ifdef HAVE_STRFTIME time (&world->starttime); #endif len = world->numpop2 + (world->options->gamma ? 1 : 0); #ifdef LONGSUM len += world->numpop * 3; #endif /*LONGSUM*/ #ifdef SLOWNET if (!options->readsum) mpi_broadcast_results (world, world->loci, pack_sumfile_buffer, unpack_sumfile_buffer); mpi_broadcast_results (world, world->loci + ((world->loci == 1) ? 0 : 1), pack_result_buffer, unpack_result_buffer); //fprintf(stdout,"%i >>>>>>>>> wait for all at barrier in profiles_tables [main.c:1053]",myID); MYMPIBARRIER (comm_world); if (myID == MASTER) { print_profile_title (world); #ifdef PRETTY pdf_print_profile_title(world); #endif mpi_profiles_master (world, len, world->profilewho); } else mpi_profiles_worker (world, gmaxptr); #else /*SLOWNET -not*/ if(myID==MASTER && (options->profile == TABLES || options->profile == ALL)) { print_profile_title (world); #ifdef PRETTY pdf_print_profile_title(world); #endif } for (i = 0; i < len; i++) { (void) print_profile_likelihood_driver (i, world, gmaxptr); if(options->profile == TABLES || options->profile == ALL) { FPRINTF (world->outfile, "%s\n\n", world->buffer); //ascii_print_profile_table(options->profilemethod, world); #ifdef PRETTY location = strlen(world->buffer); pdf_print_profile_table(55., &page_height, options->profilemethod, world->buffer + location + 1, world); #endif } world->buffer[0] = '\0'; } #endif /*SLOWNET end of not*/ if (myID == MASTER && options->printprofsummary) { print_profile_percentile (world); // print profile summary FPRINTF (world->outfile, "%s\n\n", world->buffer); #ifdef PRETTY pdf_print_profile_percentile (world); #endif world->buffer[0] = '\0'; destroy_profile_percentiles (world); } } //myfree(world->buffer); } /// finish up mac files \todo remove this function void finish_mac (option_fmt * options, data_fmt * data) { #ifdef MAC #if 0 fixmacfile (options->outfilename); if (options->plotmethod == PLOTALL) fixmacfile (options->mathfilename); if (options->treeprint) fixmacfile (options->treefilename); if (options->parmfile) fixmacfile ("parmfile"); if (data->sumfile) fixmacfile (options->sumfilename); #endif #endif } /// \brief setup a locus /// /// Set up a the structures for a locus. Run-Parameters are copied into the /// major structure WORLD from options and data. a first genealogy is build /// adjusted to the starting parameters (changing times of nodes) and a first /// conditional likelihood is calculated for all nodes, also the tree-parallele /// time structure is generated. /// Several run controllers are checked: replicate, datatype /// \retvalue 0 { failed to setup structure for locus because there is no data } /// \retvalue 1 { succeeded to setup locus-structure } int setup_locus (long locus, world_fmt * world, option_fmt * options, data_fmt * data) { world->locus = locus; set_param (world, data, options, locus); if (world->options->progress) print_menu_locus (stdout, world, locus); if (world->options->writelog) print_menu_locus (world->options->logfile, world, locus); world->start = TRUE; buildtree (world, options, data, locus); if (world->replicate == 0) { if (strchr (SNPTYPES, options->datatype)) { if (options->datatype == 'u') world->data->seq->endsite *= (data->seq->addon + 1); world->data->seq->endsite += data->seq->addon; } } // if (!options->replicate // || (options->replicate && options->replicatenum == 0) // || (options->replicate && world->replicate == options->replicatenum)) //{ if (data->skiploci[locus]) /* skip loci with no tips */ return 0; //} create_treetimelist (world, &world->treetimes, locus); fix_times (world, options); first_smooth (world, locus); world->likelihood[0] = treelikelihood (world); world->allikemax = -DBL_MAX; /* for best tree option */ #ifdef UEP if (options->uep) { world->treelen = 0.0; if (options->ueprate > 0.0) calc_treelength (world->root->next->back, &world->treelen); //world->ueplikelihood = ueplikelihood(world); //world->likelihood[0] += world->ueplikelihood; } #endif return 1; } /// condense a single genealogy into a sufficient statistic for the maximization phase void condense_time (world_fmt * world, long *step, long *j, MYREAL * accepted, long *G, long *steps, long oldsteps, long rep) { static long n = 0; #ifdef INTEGRATEDLIKE long i; long z; long nn = world->numpop2 + world->bayes->mu; MYREAL x; MYREAL delta; #endif world->rep = rep; if(world->options->bayes_infer) { if (world->in_last_chain) { // syncs treeupdate and paramupdate if(world->bayesaccept == -1) { world->bayes->oldval = probg_treetimes(world); } bayes_save (world, *step * world->options->lincr); store_events(world, world->treetimes, world->numpop, rep); *accepted += *j; #ifdef INTEGRATEDLIKE n += 1; for(i=0; i < nn; i++) { if(world->bayes->map[i][1] == INVALID) continue; else { z = world->bayes->map[i][1]; } x = world->param0[z]; delta = x - world->atl[rep][world->locus].param0[z]; world->atl[rep][world->locus].param0[z] += delta/n; } #else return; #endif } else { warning("Bayesian analysis: do not run multiple long chains, run 1 long chain! [nothing will be wrong but it is a waste of time]"); return; } } if (*step == 0) { copy_time (world, world->treetimes, FIRSTSTEP, 0, world->numpop, rep); *accepted += *j; //for INTEGRATEDLIKE n = 0; } else { copy_time (world, world->treetimes, *G, *G + (long) (*j > 0), world->numpop, rep); if (*step < *steps) { //handled in advance_world * G += (long) (*j > 0); *accepted += *j; } } if (*step >= oldsteps - 1 && world->options->movingsteps) { if (((MYREAL) (*G + 1)) < world->options->acceptfreq * oldsteps) { (*steps)++; } } } void print_theta0(FILE *file, world_fmt *world, long maxreplicate) { long i; long z; long locus; long rep; long nn = world->numpop2; fprintf(file,"Locus Replicate Parameters\n"); fprintf(file,"-----------------------------------------------------------\n"); for(locus=0; locus < world->loci; locus++) { for(rep = 0; rep < maxreplicate; rep++) { fprintf(file,"%5li %5li ",locus, rep+1); for(i=0; i < nn; i++) { if(world->bayes->map[i][1] == INVALID) continue; else { z = world->bayes->map[i][1]; } fprintf(file,"%f ", world->atl[rep][world->locus].param0[z]); } fprintf(file,"\n"); } } fprintf(file,"\n\n"); } /// set type of replication scheme long set_repkind (option_fmt * options) { if (options->replicate) { if (options->replicatenum == 0) return MULTIPLECHAIN; else return MULTIPLERUN; } return SINGLECHAIN; } /// intialize heating scheme void heating_init (world_fmt ** universe, int usize, data_fmt * data, option_fmt * options) { long chain; char tmp[20]; for (chain = 1; chain < usize; ++chain) { MYSNPRINTF (tmp, 20, "%10.5f", options->heat[chain]); create_world (&universe[chain], data->loci); universe[chain]->cold = FALSE; init_world (universe[chain], data, options); } } /// prepare for heating scheme: Step I void heating_prepare (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, long rep) { long chain; EARTH->heat = options->heat[0]; for (chain = 1; chain < usize; ++chain) klone (EARTH, universe[chain], options, data, options->heat[chain]); } /// prepare for heating scheme: Step I [this function failed in the parallel run, unused for now, replaced by above] void heating_prepare_old (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, long rep) { long chain; EARTH->heat = options->heat[0]; if (rep == 0) { for (chain = 1; chain < usize; ++chain) klone (EARTH, universe[chain], options, data, options->heat[chain]); } else { for (chain = 1; chain < usize; ++chain) klone_part (EARTH, universe[chain], options, data, options->heat[chain]); } } /// prepare for heating scheme: Step II void heating_prepare2 (world_fmt ** universe, int usize) { long chain; universe[0]->averageheat = 1.0; for (chain = 1; chain < usize; ++chain) { universe[chain]->G = 0; universe[chain]->averageheat = universe[chain]->options->adaptiveheat ? 0.0 : 1. / universe[chain]->heat; } for (chain = 1; chain < usize; ++chain) { clone_polish (EARTH, universe[chain]); } } /// \brief generates replicate number /// /// generates replicate number /// \rtval 0 no replicate /// \rtval rep number of rpelicates long replicate_number (option_fmt * options, long chain, char type, long replicate) { long rep = 0; if (options->replicate && options->replicatenum > 0) rep = replicate; else { if (type == 'l' && options->replicate) rep = chain; else rep = 0; } return rep; } /// generate genealogies for a single locus /// \callgraph void run_locus (world_fmt ** universe, int usize, option_fmt * options, data_fmt * data, tpool_t * localheating_pool, long maxreplicate, long locus, long *treefilepos, long *Gmax) { long i; long convergence_len = universe[0]->numpop2 + 3 * universe[0]->numpop; long replicate; if(options->bayes_infer) convergence_len = universe[0]->numpop2 + 1; memset(EARTH->convergence->chain_means,0,maxreplicate * convergence_len * sizeof(MYREAL)); memset(EARTH->convergence->chain_s,0,maxreplicate * convergence_len * sizeof(MYREAL)); memset(EARTH->convergence->gelmanmeanmaxR,0,maxreplicate * maxreplicate * sizeof(MYREAL)); for (replicate = 0; replicate < maxreplicate; replicate++) { run_replicate (locus, replicate, universe, options, data, localheating_pool, usize, treefilepos, Gmax); if(EARTH->cold && EARTH->options->replicatenum > 0) { if(options->gelman) { chain_means(&EARTH->convergence->chain_means[replicate * convergence_len], EARTH); chain_s(EARTH->convergence->chain_s, EARTH->convergence->chain_means, EARTH, replicate); if (replicate > 0) { convergence_check_bayes(EARTH, maxreplicate); convergence_progress(stdout, EARTH); } } } } #ifdef UEP if (options->uep) show_uep_store (EARTH); #endif if (options->replicate && options->replicatenum > 0) { EARTH->repkind = MULTIPLERUN; if (options->bayes_infer) { calculate_credibility_interval (EARTH, locus); // return; } #ifdef LONGSUM change_longsum_times (EARTH); #endif /*LONGSUM*/ // over multiple replicates if present or single locus #ifdef INTEGRATEDLIKE (void) estimateParameter (options->replicatenum, *Gmax, EARTH, options, EARTH->cov[locus], options->lchains, /*type */ 'l', SINGLELOCUS, EARTH->repkind); #else if (!options->bayes_infer) { (void) estimateParameter (options->replicatenum, *Gmax, EARTH, options, EARTH->cov[locus], options->lchains, /*type */ 'l', SINGLELOCUS, EARTH->repkind); } #endif } else { if (options->bayes_infer) { calculate_credibility_interval (EARTH, locus); } } if (options->heating) { for (i = 0; i < options->heated_chains; i++) { free_tree (universe[i]->root, universe[i]); // free_timevector(universe[i]->treetimes); myfree(universe[i]->nodep); } } else { free_tree (EARTH->root, EARTH); myfree(EARTH->nodep); // free_timevector(EARTH->treetimes); } if(options->bayes_infer) bayes_reset (universe[0]); } /// \brief updates the tree and records acceptance /// /// updates the tree and records acceptance /// when in bayesian mode change between changing the tree and the parameters void run_one_update (world_fmt * world) { long count = 0; long np = world->numpop2; if (world->options->bayes_infer) { if(world->bayes->mu) { np += 1; } world->bayesaccept = bayes_update (world); if (world->bayesaccept == -1) //either do tree updates or parameter updates { world->accept += (count = tree_update (world, world->G)); world->bayes->accept[np] += count; //permanent recorder for the tree acceptance, // for some obscure reason the world->accept gets lost, most likely every cycle; world->bayes->trials[np] += 1; // counter for trees tried } world->param_like = world->bayes->oldval; } else { world->accept += tree_update (world, world->G); } } /// \brief updates all trees, controls updates and heating /// /// controls updates and temperatures (threaded or unthreaded) /// updates the tree and/or parameters /// \callgraph void run_updates (world_fmt ** universe, int usize, option_fmt * options, tpool_t * localheating_pool, long inc, long increment, long step, long steps) { #ifndef PTHREADS long ii; #endif if (options->heating) { #ifdef PTHREADS /*using threads and running on an SMP machine */ fill_tpool (*localheating_pool, universe, usize); tpool_synchronize (*localheating_pool, usize); #else /*heating but not using threads */ for (ii = 0; ii < options->heated_chains; ii++) { run_one_update (universe[ii]); } #endif /* end of not-using threads */ if(options->bayes_infer && RANDUM() > options->updateratio) { #ifdef UEP if (options->uep && EARTH->in_last_chain) update_uepanc (EARTH); #endif return; } else { heated_swap (universe, EARTH->options); if (options->adaptiveheat) adjust_temperatures (universe, options->heated_chains, inc /*rement */ + step * increment, steps * increment); } } else /* no heating */ { run_one_update (EARTH); } #ifdef UEP if (options->uep && EARTH->in_last_chain) update_uepanc (EARTH); #endif } /// generates trees over the interval increments void run_increments (world_fmt ** universe, long usize, option_fmt * options, tpool_t * localheating_pool, long increment, long step, long steps) { long i; for (i = 0; i < increment; i++) { EARTH->actualinc = i; run_updates (universe, usize, options, localheating_pool, i, increment, step, steps); if (EARTH->likelihood[EARTH->G] > EARTH->maxdatallike) EARTH->maxdatallike = EARTH->likelihood[EARTH->G]; } } /// \brief run a chain /// run a chain void run_steps (world_fmt ** universe, long usize, option_fmt * options, tpool_t * localheating_pool, long increment, long steps, long rep) { long step; long oldsteps = steps; long ii; MYREAL var=0.0; if(EARTH->options->bayes_infer) single_chain_var (NULL, step, &var, NULL, NULL); for (step = 0; step < steps + 1; step++) { EARTH->increment = increment; run_increments (universe, usize, options, localheating_pool, increment, step, steps); #ifdef UEP store_uep (EARTH); #endif // printf("step=%li inc=%li locus=%li\n",step,increment,EARTH->locus); condense_time (EARTH, &step, &EARTH->accept, &EARTH->accept_freq, &EARTH->G, &steps, oldsteps, rep); calculate_BF(universe,options); if(EARTH->options->bayes_infer) single_chain_var (EARTH, step, &var, EARTH->autocorrelation, EARTH->effective_sample); if (step > 0) { advance_clone_like (EARTH, EARTH->accept, &EARTH->G); EARTH->accept = 0L; } // if we do heating if (EARTH->options->heating) { for (ii = 1; ii < EARTH->options->heated_chains; ++ii) { universe[ii]->accept_freq += universe[ii]->accept; advance_clone_like (universe[ii], universe[ii]->accept, &(universe[ii])->G); universe[ii]->accept = 0L; } } } } /// run all chains for short and then for long void run_chains (world_fmt ** universe, long usize, option_fmt * options, tpool_t * localheating_pool, long replicate, long chains, char type, long increment, long oldsteps, long *treefilepos, long *Gmax) { long i; long steps; long chain; long rep; const long locus = EARTH->locus; long kind; long pluschain = 0; const MYREAL treeupdateratio = (options->bayes_infer ? options->updateratio : 1.0); for (chain = 0; chain < chains || (type == 'l' && chain >= chains && EARTH->param_like > options->lcepsilon); chain++) { if (options->heating) MERKUR->swapped = VENUS->swapped = EARTH->swapped = 0; rep = replicate_number (options, chain, type, replicate); precalc_world (EARTH); polish_world (EARTH); burnin_chain (EARTH); EARTH->G = 0; EARTH->accept_freq = 0.; EARTH->accept = 0; steps = oldsteps; EARTH->maxdatallike = EARTH->likelihood[0]; if (options->heating) { heating_prepare2 (universe, usize); for(i=1; ioptions->heated_chains; i++) { burnin_chain (universe[i]); } reset_heated_accept(universe,EARTH->options->heated_chains); } set_penalizer (chain, chains, type, universe); // run steps, run increments, run updaters run_steps (universe, usize, options, localheating_pool, increment, steps, rep); decide_plot (EARTH->options, chain, chains, type); // prepare for parameter estimation, precompute and copy memcpy (EARTH->param00, EARTH->param0, sizeof (MYREAL) * EARTH->numpop2); #ifndef INTEGRATELIKE if(!options->bayes_infer) { memcpy (EARTH->atl[rep][locus].param0, EARTH->param00, sizeof (MYREAL) * EARTH->numpop2); log_param0 (EARTH->atl[rep][locus].param0, EARTH->atl[rep][locus].lparam0, EARTH->numpop2); copies2lcopies (&EARTH->atl[rep][locus]); } #else // memcpy (EARTH->atl[rep][locus].param0, EARTH->param00, // sizeof (MYREAL) * EARTH->numpop2); log_param0 (EARTH->atl[rep][locus].param0, EARTH->atl[rep][locus].lparam0, EARTH->numpop2); copies2lcopies (&EARTH->atl[rep][locus]); #endif // start parameter estimation for // single chain, single replicate EARTH->repkind = SINGLECHAIN; kind = SINGLELOCUS; *Gmax = (EARTH->G > *Gmax) ? EARTH->G + 1 : (*Gmax); #ifdef LONGSUM change_longsum_times (EARTH); #endif /*LONGSUM*/ if (!options->bayes_infer) { (void) estimateParameter (rep, *Gmax, EARTH, options, EARTH->cov[locus], chain, type, kind, EARTH->repkind); } if (EARTH->options->heating) { print_heating_progress (universe, EARTH->options, increment * steps * treeupdateratio); reset_heated_accept(universe,EARTH->options->heated_chains); } else { print_progress (EARTH->options, EARTH, rep, increment * steps * treeupdateratio, (long) EARTH->accept_freq); EARTH->accept_freq = 0. ; EARTH->accept = 0L ; } // cleanup and prepare next prepare_next_chain (universe, EARTH->options, type, chain, &chains, &pluschain, locus, replicate); } //end chains } /// set penalizing distance for jumps in the maximizer on or off void set_penalizer (long chain, long chains, char type, world_fmt ** universe) { if (type == 'l') { if (chain >= chains - 1) { EARTH->in_last_chain = TRUE; unset_penalizer_function (TRUE); } } else { EARTH->in_last_chain = FALSE; unset_penalizer_function (FALSE); } } /// \brief run a replicate /// /// run a replicate /// \callgraph void run_replicate (long locus, long replicate, world_fmt ** universe, option_fmt * options, data_fmt * data, tpool_t * localheating_pool, int usize, long *treefilepos, long *Gmax) { // long i,j; long chains = 0; char type = 's'; long pluschain; long runs; long increment = 1; long steps; long oldsteps = 0; /* loop over independent replicates----------------------- */ /* but make sure that we get single chain estimates, too */ EARTH->locus = locus; EARTH->repkind = SINGLECHAIN; EARTH->replicate = replicate; if (setup_locus (locus, EARTH, options, data) == 0) return; if (options->heating) heating_prepare (universe, usize, options, data, replicate); type = 's'; runs = 1; // rep = 0; pluschain = options->pluschain; //see obscure options /* short and long chains ----------------------------- */ set_bounds (&increment, &oldsteps, &chains, options, type); steps = oldsteps; EARTH->increment = increment; while (runs-- >= 0) { if (myID == MASTER && type == 's') { print_menu_chain (type, FIRSTCHAIN, oldsteps, EARTH, options, replicate); if (EARTH->options->treeprint == ALL) print_tree (EARTH, 0, treefilepos); } EARTH->chains = chains; /* loop over chains--------------------------- */ run_chains (universe, usize, options, localheating_pool, replicate, chains, type, increment, oldsteps, treefilepos, Gmax); //PB 020203 change_chaintype (locus, &type, EARTH, &increment, &oldsteps, &chains, options); /* debug if(EARTH->cold) { for(j=0; j < EARTH->numpop2; j++) { for(i=0;i < (EARTH->mighistloci[EARTH->locus].eventbinnum[j]); i++) { fprintf(stdout,"(%f,%f) ",EARTH->mighistloci[EARTH->locus].eventbins[j][i][0], EARTH->mighistloci[EARTH->locus].eventbins[j][i][1]); } fprintf(stdout,"\n%i>######## %li ######\n",myID, EARTH->mighistloci[EARTH->locus].eventbinnum[j]); } } */ /* evaluate multiple long chain estimates */ if (runs < 0 && options->replicate && options->replicatenum == 0) { EARTH->repkind = MULTIPLECHAIN; #ifdef LONGSUM change_longsum_times (EARTH); #endif /*LONGSUM*/ if (!options->bayes_infer) { (void) estimateParameter (replicate, *Gmax, EARTH, options, EARTH->cov[locus], chains, type, SINGLELOCUS, EARTH->repkind); } } } //end runs if(EARTH->cold && EARTH->options->bayes_infer) { collect_eff_values(EARTH); } } void print_burnin_stop(FILE *file, world_fmt **universe, option_fmt * options) { world_fmt *world = universe[0]; long z; long maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; fprintf(file,"\n\n\nStop of burn-in phase due to convergence\n"); fprintf(file,"----------------------------------------\n\n"); fprintf(file,"Locus Replicate Steps Variance ratio (new/old variance)\n"); fprintf(file,"----- --------- ------ ---------------------------------\n"); for(z=0; z < world->loci * maxreplicate; z++) { fprintf(file,"%5li %5li %10li %10.4f (%f/%f)\n",world->burnin_stops[z].locus, world->burnin_stops[z].replicate, world->burnin_stops[z].stopstep, world->burnin_stops[z].variance/world->burnin_stops[z].oldvariance, world->burnin_stops[z].variance, world->burnin_stops[z].oldvariance); // world->burnin_stops[z].worker = myID; } fprintf(file,"\n\n"); #ifdef PRETTY if(file==EARTH->outfile) pdf_burnin_stops(EARTH, maxreplicate); #endif } void print_bayesfactor(FILE *file, world_fmt **universe, option_fmt * options) { long t; long maxreplicate = (options->replicate && options->replicatenum > 0) ? options->replicatenum : 1; MYREAL sum = 0.; MYREAL heat0 = 1.; MYREAL heat1 = 1.; if(options->heating) { for(t=1; t < EARTH->options->heated_chains; t++) { #ifdef MPI heat0 = 1./options->heat[t-1] ; heat1 = 1./options->heat[t]; #else if(options->adaptiveheat) { heat0 = universe[t-1]->averageheat ; heat1 = universe[t]->averageheat; } else { heat0 = universe[t-1]->heat ; heat1 = universe[t]->heat; } #endif sum += (heat0 - heat1) * ((log(EARTH->bf[t-1]) + log(EARTH->bf[t]))/(2. * EARTH->options->lsteps*EARTH->loci*maxreplicate) + EARTH->bfscale); // printf("%i> sum[%li]=%f temp=(%f %f) values=(%f %f) scale=%f\n",myID,t,sum,heat0,heat1,log(EARTH->bf[t-1]),log(EARTH->bf[t]), EARTH->bfscale); } fprintf(file,"\n\n\nLog-Probability of the data given the model (marginal likelihood)\n"); fprintf(file,"--------------------------------------------------------------------\n[Use this value for Bayes factor calculations:\nBF = Exp[log(P(D|thisModel) - log(P(D|otherModel)]\nshows the support for thisModel]\n\n"); fprintf(file,"(1) Thermodynamic integration: log(Prob(D|Model))=%f %s\n", sum, options->adaptiveheat ? "(adaptive heating -> value may be wrong)" : ""); fprintf(file,"(2) Harmonic mean: log(Prob(D|Model))=%f\n", EARTH->bfscale - log (EARTH->hm / (EARTH->options->lsteps*EARTH->loci*maxreplicate))); fprintf(file,"(3) Arithmetic mean: log(Prob(D|Model))=%f\n", EARTH->bfscale + log (EARTH->am / (EARTH->options->lsteps*EARTH->loci*maxreplicate))); fprintf(file,"(1) and (2) should give a similar result, (2) is considered more\ncrude than (1), but (1) needs heating with several well-spaced chains\n(3) maybe strongly biased by large values\n\n\n"); #ifdef PRETTY if(file==EARTH->outfile) pdf_bayes_factor(EARTH,sum, maxreplicate); #endif } else { fprintf(file,"\n\n\nLog-Probability of the data given the model (marginal likelihood)\n"); fprintf(file,"--------------------------------------------------------------------\n[Use this value for Bayes factor calculations:\nBF = Exp[log(P(D|thisModel) - log(P(D|otherModel)]\nshows the support for thisModel]\n\n"); fprintf(file,"(1) Thermodynamic integration: log(Prob(D|Model))= UNAVAILABLE because no heating was used\n"); fprintf(file,"(2) Harmonic mean: log(Prob(D|Model))=%f\n", EARTH->bfscale - log (EARTH->hm / (EARTH->options->lsteps*EARTH->loci*maxreplicate))); fprintf(file,"(3) Arithmetic mean: log(Prob(D|Model))=%f\n", EARTH->bfscale + log (EARTH->am / (EARTH->options->lsteps*EARTH->loci*maxreplicate))); fprintf(file,"(2) is considered a crude estimate; (3) maybe strongly biased by large values\n\n\n"); #ifdef PRETTY if(file==EARTH->outfile) pdf_bayes_factor(EARTH,sum, maxreplicate); #endif } } /// calculate bayes factor using thermodynamic integration /// based on a method by Friel and Pettitt 2005 /// (http://www.stats.gla.ac.uk/research/TechRep2005/05.10.pdf) /// this is the same method described in Lartillot and Phillippe 2006 Syst Bio /// integrate over all temperature using a simple trapezoidal rule /// prob(D|model) is only accurate with intervals for temperatures from 1/0 to 1/1. /// reports also the harmonic mean and the arithmetic mean void calculate_BF(world_fmt **universe, option_fmt *options) { long i; MYREAL xx; // printf("@%i> \t ",myID); if(options->heating) { for (i = 0; i < options->heated_chains; i++) { if(universe[i]->param_like < universe[i]->bfscale) { EARTH->bf[i] += exp(universe[i]->param_like - universe[i]->bfscale); } else { EARTH->bf[i] *= exp(universe[i]->bfscale - universe[i]->param_like); universe[i]->bfscale = universe[i]->param_like; EARTH->bf[i] += exp(universe[i]->param_like - universe[i]->bfscale); } } } else { EARTH->bfscale = EARTH->param_like; } xx = EXP(EARTH->param_like - EARTH->bfscale); if(xx>SMALLEPSILON) { EARTH->hm += 1. / xx; EARTH->am += xx; } // printf("%i>\tbf[0]=%f hm=%f am=%f scale=%f plike=%f oldval=%f\n",myID, EARTH->bf[0],EARTH->hm, EARTH->am, EARTH->bfscale, EARTH->param_like,EARTH->bayes->oldval); } #ifdef LONGSUM /// put timepoints into the total tree to allow for bottlenecks and growth long find_chaincutoffs (MYREAL * treetime, world_fmt * world, long locus, long r) { long j; long T; MYREAL totaltime = 0.; long copies = 0; tarchive_fmt *atl; atl = world->atl[r][locus].tl; for (j = 0; j < world->atl[r][locus].T; j++) { T = atl[j].longsumlen - 1; copies += atl[j].copies; totaltime = atl[j].longsum[T].eventtime / 3.; treetime[0] += totaltime; treetime[1] += totaltime + totaltime; treetime[2] += totaltime + totaltime + totaltime; } return copies; } /// find the cutoffs for the locus, averaging over multiple replicates void find_loci_cutoffs (MYREAL * treetime, world_fmt * world) { long r; long locus; long count = 0; long repstart = 0; long repstop = 1; memset (treetime, 0, sizeof (MYREAL) * 3); set_replicates (world, world->repkind, world->rep, &repstart, &repstop); for (locus = 0; locus < world->loci; locus++) { for (r = repstart; r < repstop; r++) { count += find_chaincutoffs (treetime, world, locus, r); } } treetime[0] /= (MYREAL) count; treetime[1] /= (MYREAL) count; treetime[2] /= (MYREAL) count; } /// find the cutoffs avaergin over all replicates void find_replicate_cutoffs (MYREAL * treetime, world_fmt * world) { long r; long count = 0; long repstart = 0; long repstop = 1; memset (treetime, 0, sizeof (MYREAL) * 3); set_replicates (world, world->repkind, world->rep, &repstart, &repstop); for (r = repstart; r < repstop; r++) { count += find_chaincutoffs (treetime, world, world->locus, r); } treetime[0] /= (MYREAL) count; treetime[1] /= (MYREAL) count; treetime[2] /= (MYREAL) count; } /// change the cutoff times void change_longsum_times (world_fmt * world) { long i; long numpop3 = 3 * world->numpop; long numpop6 = 2 * numpop3; MYREAL *treetime; treetime = (MYREAL *) mycalloc (3, sizeof (MYREAL)); // we have 3 classes if (world->locus < world->loci) find_replicate_cutoffs (treetime, world); else find_loci_cutoffs (treetime, world); for (i = numpop3; i < numpop6; i += 3) { world->flucrates[i] = treetime[0]; world->flucrates[i + 1] = treetime[1]; world->flucrates[i + 2] = treetime[2]; } myfree(treetime); } #endif /*LONGSUM*/