#include #include #include #include "nab.h" #include "memutil.h" #ifdef SCALAPACK #include "mpi.h" #endif /* Here are offsets into the ScaLAPACK descriptor array. */ #define DTYPE_ (0) #define CTXT_ (1) #define M_ (2) #define N_ (3) #define MB_ (4) #define NB_ (5) #define RSRC_ (6) #define CSRC_ (7) #define LLD_ (8) #define DLEN_ (9) extern REAL_T tdiag, tmmetwo; REAL_T seconds( void ); INT_T get_blocksize( void ); INT_T get_mytaskid( void ); INT_T get_numtasks( void ); INT_T dspev_(char *, char *, int *, REAL_T *, REAL_T *, REAL_T *, int *, REAL_T *, int *); INT_T dsyev_(char *, char *, INT_T *, REAL_T *, INT_T *, REAL_T *, REAL_T *, INT_T *, INT_T *); void blacs_pinfo_(INT_T *, INT_T *); void sl_init_(INT_T *, INT_T *, INT_T *); INT_T sl_gridreshape_(INT_T *, INT_T *, INT_T *, INT_T *, INT_T *, INT_T *); void blacs_gridinfo_(INT_T *, INT_T *, INT_T *, INT_T *, INT_T *); void blacs_barrier_(INT_T *, char *); void blacs_gridexit_(INT_T *); INT_T numroc_(INT_T *, INT_T *, INT_T *, INT_T *, INT_T *); void descinit_(INT_T *, INT_T *, INT_T *, INT_T *, INT_T *, INT_T *, INT_T *, INT_T *, INT_T *, INT_T *); void pdsyev_(char *, char *, INT_T *, REAL_T *, INT_T *, INT_T *, INT_T *, REAL_T *, REAL_T *, INT_T *, INT_T *, INT_T *, REAL_T *, INT_T *, INT_T *); void pdgemr2d_(INT_T *, INT_T *, REAL_T *, INT_T *, INT_T *, INT_T *, REAL_T *, INT_T *, INT_T *, INT_T *, INT_T *); INT_T myroc(int, int, int, int); REAL_T *ptr2d(REAL_T *, int *, int, int); FILE *safe_fopen( char *, char * ); static int c__3 = 3; /*********************************************************************** SYMNUM() ************************************************************************/ /* ----- routine to give the symmetry number. only for linear */ /* molecules. for others symmetry number is unity ----- */ static int symnum(int natoms, REAL_T * amass, REAL_T * sn, int *linear) { /* Parameter adjustments */ --amass; /* Function Body */ *sn = 1.; if (natoms <= 2) { *linear = 1; if (amass[1] == amass[2]) { *sn = 2.; } } return 0; } /* symnum */ /* ----------------------------------------------------------------------- */ static int mofi(int natoms, REAL_T * c, REAL_T * amass, REAL_T * pmom) { /* Initialized data */ static char uplo[1] = "U"; static char jobz[1] = "V"; static REAL_T zero = 0.; /* System generated locals */ /* Local variables */ static REAL_T t[9], x, y, z, e2[30], wt; static int i, iat; static REAL_T com[3]; static int ier, iaind; static REAL_T totwt, eigvec[9]; /* compute the principal moments of inertia. */ /* units are amu-bohr**2 */ /* Parameter adjustments */ --pmom; --amass; --c; /* Function Body */ /* compute the position of the center of mass and translate */ /* it to the origin. */ com[0] = zero; com[1] = zero; com[2] = zero; totwt = zero; for (iat = 1; iat <= natoms; ++iat) { iaind = (iat - 1) * 3; wt = amass[iat]; totwt += wt; com[0] += wt * c[iaind + 1]; com[1] += wt * c[iaind + 2]; com[2] += wt * c[iaind + 3]; } com[0] /= totwt; com[1] /= totwt; com[2] /= totwt; /* compute the principal moments. */ for (i = 1; i <= 9; ++i) { t[i - 1] = zero; } for (iat = 1; iat <= natoms; ++iat) { wt = amass[iat]; x = c[1 + 3 * (iat - 1)] - com[1 - 1]; y = c[2 + 3 * (iat - 1)] - com[2 - 1]; z = c[3 + 3 * (iat - 1)] - com[3 - 1]; t[0] += wt * (y * y + z * z); t[2] += wt * (x * x + z * z); t[5] += wt * (x * x + y * y); t[1] -= wt * x * y; t[3] -= wt * x * z; t[4] -= wt * y * z; } ier = 0; dspev_(jobz, uplo, &c__3, t, &pmom[1], eigvec, &c__3, e2, &ier); return 0; } /* mofi */ /*********************************************************************** THERMO() ************************************************************************/ static int thermo(int natoms, int n, int ilevel, REAL_T *c, REAL_T *amass, REAL_T *freq, REAL_T t, REAL_T patm, REAL_T energy ) { REAL_T *vtemp=NULL, *evibn=NULL, *cvibn=NULL, *svibn=NULL; /* Initialized data */ static REAL_T zero = 0.; static REAL_T akilo = 1e3; static REAL_T pstd = 101325.; static REAL_T pt2 = .2; static REAL_T half = .5; static REAL_T one = 1.; static REAL_T onept5 = 1.5; static REAL_T two = 2.; static REAL_T twopt5 = 2.5; static REAL_T four = 4.; static REAL_T eight = 8.; /* System generated locals */ REAL_T d__1; /* Local variables */ REAL_T e; int i; REAL_T p, s, pi, cv, sn, rt, em1; int iff; REAL_T arg, gas; int iat; REAL_T con, dum, dum1, dum2, argd, cvib, evib; int ndof; REAL_T avog, ezkc, pipi, ezpe, tokg, svib, crot, pmom[10], erot, etot, ctot, srot, stot, tovt, jpcal, tocal, ccont, ctran, econt, etran, scont, stran, tomet, rtemp, boltz, etovt, rtemp1, rtemp2, rtemp3, planck; int linear; REAL_T tokcal, hartre, weight; int nimag; int mytaskid; /* Allocate temporary vectors. */ vtemp = vector( 1, n ); evibn = vector( 1, n ); cvibn = vector( 1, n ); svibn = vector( 1, n ); /* Get the MPI task id. */ mytaskid = get_mytaskid(); /* given the structure of a molecule and its normal mode vibrational */ /* frequencies this routine uses standard statistical mechanical */ /* formulas for an ideal gas (in the canonical ensemble, see, */ /* for example, d. a. mcquarrie, "statistical thermodynamics", */ /* harper & row, new york, 1973, chapters 5, 6, and 8) to compute */ /* the entropy, heat capacity, and internal energy. */ /* the si system of units is used internally. conversion to units */ /* more familiar to most chemists is made for output. */ /* amass: atomic weights, in amu. */ /* pmom: principal moments of inertia, in amu-bohr**2 and */ /* in ascending order. */ /* freq: vibrational frequencies, in cm**-1 and in ascending */ /* order */ /* c : coordinates in Angstroms */ /* vtemp: vibrational temperatures, in kelvin. */ /* evibn: contribution to e from the vibration n. */ /* cvibn: contribution to cv from the vibration n. */ /* svibn: contribution to s from the vibration n. */ /* t: temperature */ /* patm: pressure, in atmospheres */ /* Parameter adjustments */ --freq; --amass; --c; /* tokg: kilograms per amu. */ /* boltz: boltzman constant, in joules per kelvin. */ /* planck: planck constant, in joule-seconds. */ /* avog: avogadro constant, in mol**(-1). */ /* jpcal: joules per calorie. */ /* tomet: metres per Angstrom. */ /* hartre: joules per hartree. */ tokg = 1.660531e-27; boltz = 1.380622e-23; planck = 6.626196e-34; avog = 6.022169e23; jpcal = 4.18674; tomet = 1e-10; hartre = 4.35981e-18; /* compute the gas constant, pi, pi**2, and e. */ /* compute the conversion factors cal per joule and kcal per joule. */ gas = avog * boltz; pi = four * atan(one); pipi = pi * pi; e = exp(one); tocal = one / jpcal; tokcal = tocal / akilo; /* print the temperature and pressure. */ p = pstd * patm; if (mytaskid == 0) { fprintf(stdout, "\n - Thermochemistry -\n\n"); fprintf(stdout, "Temperature: %8.3f\n Pressure: %8.3f\n", t, patm); } rt = gas * t; /* compute and print the molecular mass in amu, then convert to */ /* kilograms. */ weight = zero; for (iat = 1; iat <= natoms; ++iat) { weight += amass[iat]; } if (mytaskid == 0) { fprintf(stdout, " Mass: %8.3f\n", weight); } weight *= tokg; /* compute contributions due to translation: */ /* etran-- internal energy */ /* ctran-- constant v heat capacity */ /* stran-- entropy */ dum1 = boltz * t; d__1 = two * pi; dum2 = pow(d__1, onept5); arg = pow(dum1, onept5) / planck; arg = arg / p * (dum1 / planck); arg = arg * dum2 * (weight / planck); arg = arg * sqrt(weight) * pow(e, twopt5); stran = gas * log(arg); etran = onept5 * rt; ctran = onept5 * gas; /* Compute contributions due to electronic motion: */ /* It is assumed that the first electronic excitation energy */ /* is much greater than kt and that the ground state has a */ /* degeneracy of one. Under these conditions the electronic */ /* partition function can be considered to be unity. The */ /* ground electronic state is taken to be the zero of */ /* electronic energy. */ /* for monatomics print and return. */ if (natoms <= 1) { s = stran * tocal; e = etran * tokcal; cv = ctran * tocal; /* need print statement here */ free_vector( vtemp, 1, n ); free_vector( evibn, 1, n ); free_vector( cvibn, 1, n ); free_vector( svibn, 1, n ); return 0; } /* compute contributions due to rotation. */ /* Compute the principal moments of inertia, get the rotational */ /* symmetry number, see if the molecule is linear, and compute */ /* the rotational temperatures. Note the imbedded conversion */ /* of the moments to SI units. */ mofi(natoms, &c[1], &amass[1], pmom); if (mytaskid == 0) { fprintf(stdout, "Principal moments of inertia in amu-A**2:\n"); fprintf(stdout, " %12.2f%12.2f%12.2f\n", pmom[0], pmom[1], pmom[2]); } linear = 0; symnum(natoms, &amass[1], &sn, &linear); if (mytaskid == 0) { fprintf(stdout, "Rotational symmetry number is %2.0f\n", sn); } con = planck / (boltz * eight * pipi); con = con / tokg * (planck / (tomet * tomet)); if (linear) { rtemp = con / pmom[2]; if (mytaskid == 0) { if (rtemp < pt2) fprintf(stdout, "Assuming classical behavior for rotation\n"); fprintf(stdout, "Rotational temperature: %8.3f\n", rtemp); } } else { rtemp1 = con / pmom[0]; rtemp2 = con / pmom[1]; rtemp3 = con / pmom[2]; if (mytaskid == 0) { if (rtemp1 < pt2) { fprintf(stdout, "Assuming classical behavior for rotation\n"); } fprintf(stdout, "Rotational temperatures: %8.3f %8.3f %8.3f\n", rtemp1, rtemp2, rtemp3); } } /* erot-- rotational contribution to internal energy. */ /* crot-- rotational contribution to cv. */ /* srot-- rotational contribution to entropy. */ if (linear) { erot = rt; crot = gas; arg = t / rtemp * (e / sn); srot = gas * log(arg); } else { erot = onept5 * rt; crot = onept5 * gas; arg = sqrt(pi * e * e * e) / sn; dum = t / rtemp1 * (t / rtemp2) * (t / rtemp3); arg *= sqrt(dum); srot = gas * log(arg); } /* compute contributions due to vibration. */ /* compute vibrational temperatures and zero point vibrational */ /* energy. only real frequencies are included in the analysis. */ nimag = 0; for( i=1; i<=3*natoms; i++ ) if( freq[i] < -0.5 ) nimag++; if (mytaskid == 0) { if( nimag > 0 ) fprintf( stdout, "found %d imaginary frequencies\n", nimag); } /* (---iff is the first frequency to include in thermo:) */ if (ilevel != 0) { iff = 0 + nimag; } else if (linear) { iff = 5 + nimag; } else { iff = 6 + nimag; } ndof = 3 * natoms - iff; con = planck / boltz; ezpe = zero; for (i = 1; i <= ndof; ++i) { vtemp[i] = freq[i + iff] * con * 3e10; ezpe += freq[i + iff] * 3e10; } ezpe = half * planck * ezpe; ezkc = ezpe * tokcal * avog; if (mytaskid == 0) { fprintf(stdout, "Zero-point vibrational energy: %10.3f\n", ezkc); } /* compute: */ /* evib-- the vibrational component of the internal energy. */ /* cvib-- the vibrational component of the heat capacity. */ /* svib-- the vibrational component of the entropy. */ evib = zero; cvib = zero; svib = zero; for (i = 1; i <= ndof; ++i) { /* compute some common factors. */ tovt = vtemp[i] / t; etovt = exp(tovt); em1 = etovt - one; /* compute contributions due to the i'th vibration. */ econt = tovt * (half + one / em1); d__1 = tovt / em1; ccont = etovt * (d__1 * d__1); argd = one - one / etovt; if (argd > 1e-7) { scont = tovt / em1 - log(argd); } else { scont = 0.f; if (mytaskid == 0) { fprintf(stdout, "Warning: setting vibrational entropy to zero for mode %d with vtemp = %8.3f\n", i, vtemp[i]); } } evibn[i] = econt * rt; cvibn[i] = ccont * gas; svibn[i] = scont * gas; evib += econt; cvib += ccont; svib += scont; } evib *= rt; cvib *= gas; svib *= gas; /* the units are now: */ /* e-- joules/mol */ /* c-- joules/mol-kelvin */ /* s-- joules/mol-kelvin */ etot = etran + erot + evib; ctot = ctran + crot + cvib; stot = stran + srot + svib; /* convert to the following and print */ /* e-- kcal/mol */ /* c-- cal/mol-kelvin */ /* s-- cal/mol-kelvin */ etran *= tokcal; ctran *= tocal; stran *= tocal; erot *= tokcal; crot *= tocal; srot *= tocal; evib *= tokcal; cvib *= tocal; svib *= tocal; etot = etran + erot + evib + energy; ctot = ctran + crot + cvib; stot = stran + srot + svib; for (i = 1; i <= ndof; ++i) { evibn[i] *= tokcal; cvibn[i] *= tocal; svibn[i] *= tocal; } if (mytaskid == 0) { fprintf(stdout, "\n freq. E Cv S\n"); fprintf(stdout, " cm**-1 kcal/mol cal/mol-K cal/mol-K\n"); fprintf(stdout, "Total: %11.3f %11.3f %11.3f\n", etot, ctot, stot); fprintf(stdout, "translational: %11.3f %11.3f %11.3f\n", etran, ctran, stran); fprintf(stdout, "rotational: %11.3f %11.3f %11.3f\n", erot, crot, srot); fprintf(stdout, "vibrational: %11.3f %11.3f %11.3f\n", evib, cvib, svib); fprintf(stdout, "ff energy: %11.3f\n", energy ); for (i = 1; i <= iff; ++i) { fprintf(stdout, " %5d %10.3f\n", i, freq[i]); } for (i = 1; i <= ndof; ++i) { fprintf(stdout, " %5d %10.3f %11.3f %11.3f %11.3f\n", i + iff, freq[i + iff], evibn[i], cvibn[i], svibn[i]); } } free_vector( vtemp, 1, n ); free_vector( evibn, 1, n ); free_vector( cvibn, 1, n ); free_vector( svibn, 1, n ); return 0; } /* thermo */ /*********************************************************************** NMODE() ************************************************************************/ /* * Compute normal modes * * Calling parameters are as follows: * * x[n] contains the coordinates * n dimension of variables * func pointer to function that computes the energy, gradient and hessian * (probably only works now with mme2()) * eigp number of modes to post to file "vecs" * */ int nmode( REAL_T *x, int *n, REAL_T ( *func)( REAL_T*, REAL_T*, REAL_T*, REAL_T*, REAL_T*, int*, int*, int*, int*, int*, int*, int*, int*, int* ), int *eigp ) { REAL_T *g=NULL, *m=NULL, *v=NULL, *h=NULL, *work=NULL, *grad=NULL; REAL_T sumg, energy, t1, t2; int niter, i, j, k, mcopy, natom, ret_val, lwork, info; int mytaskid, numtasks, gridim; int context_PxQ=-1, context_1x1=-1, context_Nx1=-1; int descH_PxQ[DLEN_], descG_PxQ[DLEN_], descG_1x1[DLEN_]; FILE *vfp; REAL_T pressure, temperature; char uplo, jobz; size_t ncopy; #ifdef SCALAPACK int zero=0, one=1; int myrow, mycol, nprow, npcol; int myrowC, mycolC, nprowC, npcolC; int bs3, ierror, lld; int np, nq, nb, sizesytrd, nprocs, contextC, nrc, ldc, qrmem; int lldH_PxQ, lldG_PxQ, lldZ_PxQ, lldG_1x1; size_t locpH_PxQ, locpG_PxQ, locpG_1x1; size_t locqH_PxQ, locqG_PxQ, locqG_1x1; size_t sizeH_PxQ, sizeG_PxQ, sizeG_1x1; size_t adr, sizemqrleft; REAL_T *ptr, *reductarr=NULL, *eigvecrow=NULL; #endif /* If PRINT_NM_TIMES is defined print some calculation times. */ #undef PRINT_NM_TIMES /* Get mytaskid and, if SCALAPACK is defined, numtasks. */ mytaskid = get_mytaskid(); #ifdef SCALAPACK numtasks = get_numtasks(); #endif /* Allocate some dynamic vectors and matrices. */ mcopy = *n; ncopy = *n; m = vector( 1, ncopy ); v = vector( 1, ncopy ); #ifndef SCALAPACK /* If SCALAPACK is not defined, allocate full copies of g and h. */ g = vector( 1, ncopy ); h = vector( 0, ncopy*ncopy ); #else /* * If SCALAPACK is defined, allocate distributed copies of g and h, * as well as a single copy of grad. * * Create a general context. Although context_PxQ does comprise all * of the processes, it appears that the general context must be * distinct from the context(s) of the matrices that participate * in the redistribution via pdgemr2d. * * The topologic layout of the general context does not appear * to be important, so this general context is row cyclic. * * It appears to be important that the most general context be created * first, followed by successively less general contexts. For this * code the correct order of creation is Nx1 then PxQ then 1x1. Each * context is subsumed by the earlier created contexts. I don't know * what to do about overlapping contexts where one does not sumsume * the other. Failure to the most general context first leads to * synchronization errors. */ sl_init_(&context_Nx1, &numtasks, &one); /* Calculate the dimensions of the largest possible square process grid. */ gridim = (int)(sqrt((REAL_T)numtasks)+0.5); if (gridim*gridim > numtasks) { gridim -= 1; } if (gridim == 0) { gridim = 1; } /* * Initialize the process grid for block cyclic distribution of matrices. * Note that a returned context of -1 indicates that the task is not * active on the process grid. */ sl_init_(&context_PxQ, &gridim, &gridim); /* Initialize the process grid for a single (1x1) process. */ sl_init_(&context_1x1, &one, &one); /* * Get the number of rows and columns on the block cyclic (PxQ) process grid, * as well as this task's row and column on the grid. */ blacs_gridinfo_(&context_PxQ, &nprow, &npcol, &myrow, &mycol); /* * Get the blocksize for a square block. Because in egb2 the * loop index i selects three rows of the Hessian, multiply * the blocksize by three. */ bs3 = 3*get_blocksize(); /* * If this task is on the process grid, set up the array descriptors. * If this task isn't on the process grid, set descZ_PxQ[CTXT_], * descG_PxQ[CTXT_] and descH_PxQ[CTXT_] to -1. These values will * be used by pdgemr2d to determine activity on the grid. */ if (context_PxQ >= 0) { /* * Allocate then distribute the Hessian matrix h, the eigenvector * matrix z and the gradient vector g on the block cyclic process grid. * The gradient vector g does not need to be allocated for columns other * than column 0, but the descinit_ function must be called for all columns. * * The numroc_ function is used to calculate the number of matrix * elements that are distributed across a PxQ processor grid. */ locpG_PxQ = numroc_(&mcopy, &bs3, &myrow, &zero, &nprow); locqG_PxQ = numroc_(&one, &bs3, &mycol, &zero, &npcol); sizeG_PxQ = locpG_PxQ * locqG_PxQ; lldG_PxQ = locpG_PxQ; descinit_(descG_PxQ, &mcopy, &one, &bs3, &bs3, &zero, &zero, &context_PxQ, &lldG_PxQ, &info); g = vector( 1, sizeG_PxQ ); locpH_PxQ = numroc_(&mcopy, &bs3, &myrow, &zero, &nprow); locqH_PxQ = numroc_(&mcopy, &bs3, &mycol, &zero, &npcol); sizeH_PxQ = locpH_PxQ * locqH_PxQ; lldH_PxQ = locpH_PxQ; descinit_(descH_PxQ, &mcopy, &mcopy, &bs3, &bs3, &zero, &zero, &context_PxQ, &lldH_PxQ, &info); h = vector( 0, sizeH_PxQ ); } else { descG_PxQ[CTXT_] = -1; descH_PxQ[CTXT_] = -1; } /* * Get the number of rows and columns on the single process grid, * as well as this task's row and column on the grid. */ blacs_gridinfo_(&context_1x1, &nprow, &npcol, &myrow, &mycol); /* * If this task is on the process grid, set up the array descriptors. * If this task isn't on the process grid, set descG_1x1[CTXT_] ] to -1. * This value will be used by pdgemr2d to determine activity on the grid. */ if (context_1x1 >= 0) { /* * Allocate then distribute the gradient vector grad on the single * process grid. The descinit_ function is called for this array * for only the task that is active on the grid. * * Also, for the other tasks that are not active, the vector grad * is allocated because the copy of grad from the single process * grid is broadcast to the other copies. The size of this vector * for these other tasks is determined by the ncopy variable. * * The numroc_ function is used to calculate the number of matrix * elements that are distributed across a 1x1 processor grid. */ locpG_1x1 = numroc_(&mcopy, &bs3, &myrow, &zero, &nprow); locqG_1x1 = numroc_(&one, &bs3, &mycol, &zero, &npcol); sizeG_1x1 = locpG_1x1 * locqG_1x1; lldG_1x1 = locpG_1x1; descinit_(descG_1x1, &mcopy, &one, &bs3, &bs3, &zero, &zero, &context_1x1, &lldG_1x1, &info); grad = vector( 1, sizeG_1x1 ); } else { descG_1x1[CTXT_] = -1; grad = vector( 1, ncopy ); } /* Allocate the eigenvector row and reduction arrays. */ eigvecrow = vector( 0, ncopy ); reductarr = vector( 0, ncopy ); #endif /* ifndef SCALAPACK */ niter = 1; t1 = seconds(); /* * For non-ScaLAPACK execution, set some variables to values that * will select the proper sections of code below. */ #ifndef SCALAPACK gridim = 1; context_PxQ = 0; #endif /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ /* * Compute the function value in "f", * its gradient (with respect to x) in "g", and * its hessian (with respect to x) in "h". * The atomic masses are returned in "m". * The number of atoms is returned in "natom". * * The grad, descG_PxQ, descG_1x1, descH_PxQ, * gridim, context_PxQ, context_1x1 and context_Nx1 * calling parameters supply ScaLAPACK information, * or are dummy arguments for non-ScaLAPACK execution. * * The &g[1] and &m[1] calling parameters * map from 1..n indexing in this newton function * to 0..n-1 indexing in *func (which defaults to mme2). * This technique is not used for h because it must be * indexed from 1 to n. */ energy = ( *func )( x, &g[ 1 ], h, &m[ 1 ], &grad[ 1 ], descG_PxQ, descG_1x1, descH_PxQ, &context_PxQ, &context_1x1, &context_Nx1, &gridim, &natom, &niter ); /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ t2 = seconds(); #ifdef PRINT_NM_TIMES if (mytaskid == 0) { printf("\nmme2 time = %10.2f\n\n", t2 - t1); fflush(stdout); } #endif tmmetwo += t2 - t1; /* * Some ScaLAPACK functions appear to quit unexpectedly * for large matrices on a 1x1 process grid, e.g. the pdgemm_ * function in newton.c, so bypass the ScaLAPACK pdsyev_ and * use the LAPACK dsyev_ function instead. * * The correct test is (gridim == 1), not (nprow == 1 && npcol == 1) * since processes that aren't on the 1x1 grid have nprow == npcol == -1, * which would direct control to pdgemr2d_ (below) that would hang because * it would not be called from all processes, specifically not from * the process that is on the 1x1 grid and has nprow == npcol == 1. * * Non-ScaLAPACK execution will select this section of code because * gridim and context_PxQ were set to 1 and 0, respectively, above. */ if (gridim == 1) { if (context_PxQ >= 0) { sumg = 0.0; for (i = 1; i <= ncopy; i++) { sumg += g[i] * g[i]; } sumg = sqrt(sumg/((REAL_T)ncopy)); if (mytaskid == 0) { printf( "RMS gradient = %12.5e\n", sumg ); fflush( stdout ); } /* * Mass weight the Hessian: */ j = 1; for(i=1; i<=natom; i++) { g[j+2] = g[j+1] = g[j] = 1.0/sqrt(m[i]); j += 3; } k = 0; for(i=1; i<=ncopy; i++){ for(j=1; j<=ncopy; j++){ h[k] = g[i]*h[k]*g[j]; k++; } } /* * Allocate the work array, call dsyev_ and deallocate the work array. * This step requires that LAPACK and BLAS support be available in your * NAB installation. */ if( *eigp ) jobz = 'V'; else jobz = 'N'; uplo = 'U'; lwork = 3*ncopy; work = vector( 0, lwork ); k=0; t1 = seconds(); dsyev_( &jobz, &uplo, n, h, n, &v[1], work, &lwork, &info ); t2 = seconds(); #ifdef PRINT_NR_TIMES if (mytaskid == 0) { printf("dysev time = %10.2f\n\n", t2 - t1); fflush(stdout); } #endif tdiag += t2 - t1; free_vector( work, 0, lwork ); /* un-mass-weight the eigenvectors: */ k = 0; for( i=1; i<=ncopy; i++ ){ for( j=1; j<=ncopy; j++ ){ h[k++] *= g[j]; } } /* get the eigenvalues into cm**-1 units: */ for(i=1; i<=ncopy; i++){ v[i] = v[i]>=0.0 ? 108.587*sqrt(v[i]) : -108.587*sqrt(-v[i]); } /* Make an Amber-compatible formatted vecs file but only via task 0. */ if( *eigp && mytaskid == 0){ vfp = safe_fopen( "vecs", "w" ); fprintf( vfp, " modes from NAB\n" ); fprintf( vfp, "%5d\n", ncopy ); for( i=0; i= 0) { sumg = 0.0; for (i = 1; i <= ncopy; i++) { sumg += grad[i] * grad[i]; } sumg = sqrt(sumg/((REAL_T)ncopy)); printf( "RMS gradient = %12.5e\n", sumg ); fflush( stdout ); } /* * Mass weight the Hessian: * * Get the processor row and column in context_PxQ, and modify the * diagonal elements of the Hessian. If the task is not active * in context_PxQ, the myroc function returns 0. */ blacs_gridinfo_(&context_PxQ, &nprow, &npcol, &myrow, &mycol); if (context_PxQ >= 0) { j = 1; for(i=1; i<=natom; i++) { grad[j+2] = grad[j+1] = grad[j] = 1.0/sqrt(m[i]); j += 3; } for (i=1; i<=ncopy; i++) { for (j=1; j<=ncopy; j++) { ptr = ptr2d(h, descH_PxQ, i-1, j-1); if (ptr != NULL) { *ptr = grad[i] * (*ptr) * grad[j]; } } } /* * Call the ScaLAPACK routines; requires that ScaLAPACK and BLAS support * be available in your NAB installation. * * Calculate minimum size for the work array with or without calculation * of the eigenvectors. See the pdsyev_ source code for details on how * to calculate LWMIN. */ if ( *eigp ) { jobz = 'V'; nprocs = nprow * npcol; contextC = sl_gridreshape_(&context_PxQ, &zero, &one, &one, &nprocs, &one); blacs_gridinfo_(&contextC, &nprowC, &npcolC, &myrowC, &mycolC); blacs_gridexit_(&contextC); nb = descH_PxQ[NB_]; nrc = numroc_(&mcopy, &nb, &myrowC, &zero, &nprocs); ldc = (nrc > 1) ? nrc : 1; np = numroc_(&mcopy, &nb, &myrow, &zero, &nprow); nq = numroc_(&mcopy, &nb, &mycol, &zero, &npcol); sizemqrleft = nb*nb + ( (nb*(nb-1)/2 > (np+nq)*nb) ? nb*(nb-1)/2 : (np+nq)*nb ); qrmem = 2*ncopy-2; lwork = 5*ncopy + ncopy*ldc + 1 + ( (sizemqrleft > qrmem) ? sizemqrleft : qrmem ); } else { jobz = 'N'; nb = descH_PxQ[NB_]; np = numroc_(&mcopy, &nb, &myrow, &zero, &nprow); sizesytrd = ( 3*nb > nb*(np+1) ) ? 3*nb : nb*(np+1); lwork = 5*ncopy + sizesytrd + 1; } /* * Allocate the work array, call pdsyev_ and deallocate the work array. * This step requires that ScaLAPACK and BLACS support be available in * your NAB installation. */ work = vector( 0, lwork ); t1 = seconds(); uplo = 'U'; pdsyev_( &jobz, &uplo, n, h, &one, &one, descH_PxQ, &v[1], h, &one, &one, descH_PxQ, work, &lwork, &info ); t2 = seconds(); #ifdef PRINT_NR_TIMES if (mytaskid == 0) { printf("pdsyev time = %10.2f\n\n", t2 - t1); fflush(stdout); } #endif tdiag += t2 - t1; free_vector( work, 0, lwork ); /* un-mass-weight the eigenvectors: */ for (i=1; i<=ncopy; i++) { for (j=1; j<=ncopy; j++) { ptr = ptr2d(h, descH_PxQ, i-1, j-1); if (ptr != NULL) { *ptr *= grad[j]; } } } /* get the eigenvalues into cm**-1 units: */ for(i=1; i<=ncopy; i++){ v[i] = v[i]>=0.0 ? 108.587*sqrt(v[i]) : -108.587*sqrt(-v[i]); } } /* * Make an Amber-compatible formatted vecs file. Write only by task 0. * Because the elements of the eigenvector matrix are distributed across * all of the processes of context_PxQ, it is necessary to gather them * for writing by task 0. This gather is accomplished by copying the * matrix elements one row at a time into the eigvecrow array in the following * manner. If a matrix element exists in the submatrix associated with * a particular process, that element is copied to the eigvecrow array, If * that element does not exist in the submatrix, a zero is placed in the * eigvecrow array. Once a row of the eigenvector matrix has been processed * in this manner, a reduction is performed involving all of the eigvecrow * arrays, including those that aren't active on the process grid so that * pdgemr2d_ doesn't hang. */ if ( *eigp ) { if (mytaskid == 0) { vfp = safe_fopen( "vecs", "w" ); fprintf( vfp, " modes from NAB\n" ); fprintf( vfp, "%5d\n", ncopy ); for( i=0; i= 0) { j = 0; pressure = 1.0; temperature = 298.15; thermo( natom, mcopy, j, x, &m[1], &v[1], temperature, pressure, energy ); } #endif /* ifdef SCALAPACK */ } #ifndef SCALAPACK /* Free the dynamic vectors and matrices. */ if (g != NULL) free_vector( g, 1, ncopy ); if (m != NULL) free_vector( m, 1, ncopy ); if (v != NULL) free_vector( v, 1, ncopy ); if (h != NULL) free_vector( h, 0, ncopy*ncopy ); #else /* Free the dynamic vectors and matrices. */ if (context_PxQ >= 0) { if (g != NULL) free_vector( g, 1, sizeG_PxQ ); if (h != NULL) free_vector( h, 0, sizeH_PxQ ); } if (context_1x1 >= 0) { if (grad != NULL) free_vector( grad, 1, sizeG_1x1 ); } else { if (grad != NULL) free_vector( grad, 1, ncopy ); } if (eigvecrow != NULL) free_vector( eigvecrow, 0, ncopy ); if (reductarr != NULL) free_vector( reductarr, 0, ncopy ); if (m != NULL) free_vector( m, 1, ncopy ); if (v != NULL) free_vector( v, 1, ncopy ); /* Exit the process grid only for tasks that are on the process grid. */ if (context_PxQ >= 0) { blacs_gridexit_(&context_PxQ); } if (context_1x1 >= 0) { blacs_gridexit_(&context_1x1); } if (context_Nx1 >= 0) { blacs_gridexit_(&context_Nx1); } #endif /* ifndef SCALAPACK */ /* Call egb2 with an iteration count of -3 to free static vectors. */ niter = -3; ( *func )( x, &g[ 1 ], h, &m[ 1 ], &grad[ 1 ], descG_PxQ, descG_1x1, descH_PxQ, &context_PxQ, &context_1x1, &context_Nx1, &gridim, &natom, &niter ); ret_val = 0; return ret_val; }