/* * eff.c: implement energy subroutines for 3 or 4 cartesian coordinates. * * Parallelization via OpenMP, creation of pair lists using a kd tree, and * optional calculation in 4D were added by Russ Brown (russ.brown@sun.com) */ #if defined(MPI) || defined(SCALAPACK) #include "mpi.h" #endif /* Here is the structure for a kd tree node. */ typedef struct kdnode { INT_T n; struct kdnode *lo, *hi; } KDNODE_T; /*********************************************************************** ECONS() ************************************************************************/ /* Calculate the constrained energy and first derivatives. */ static REAL_T econs( REAL_T *x, REAL_T *f ) { int i, foff, threadnum, numthreads; REAL_T e_cons, rx, ry, rz, rw; e_cons = 0.0; #pragma omp parallel reduction (+: e_cons) private (i, rx, ry, rz, rw) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute an offset into the gradient array for this thread, * but only if OPENMP is defined. */ #ifdef OPENMP foff = dim * prm->Natom * threadnum; #else foff = 0; #endif /* * Loop over all atoms. Map loop indices onto OpenMP threads * or MPI tasks using (static, 1) scheduling. */ for (i = threadnum; i < prm->Natom; i += numthreads) { if (constrained[i]) { rx = x[dim * i ] - x0[dim * i ]; ry = x[dim * i + 1] - x0[dim * i + 1]; rz = x[dim * i + 2] - x0[dim * i + 2]; e_cons += wcons * (rx * rx + ry * ry + rz * rz); f[foff + dim * i ] += 2. * wcons * rx; f[foff + dim * i + 1] += 2. * wcons * ry; f[foff + dim * i + 2] += 2. * wcons * rz; if (dim == 4) { rw = x[dim * i + 3] - x0[dim * i + 3]; e_cons += wcons * rw * rw; f[foff + dim * i + 3] += 2. * wcons * rw; } } } } return (e_cons); } /*********************************************************************** EBOND() ************************************************************************/ /* Calculate the bond stretching energy and first derivatives.*/ static REAL_T ebond( int nbond, int *a1, int *a2, int *atype, REAL_T *Rk, REAL_T *Req, REAL_T *x, REAL_T *f ) { int i, at1, at2, atyp, foff, threadnum, numthreads; REAL_T e_bond, r, rx, ry, rz, rw, r2, s, db, df, e; e_bond = 0.0; #pragma omp parallel reduction (+: e_bond) \ private (i, foff, at1, at2, atyp, threadnum, numthreads, \ rx, ry, rz, rw, r2, s, r, db, df, e ) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute an offset into the gradient array for this thread, * but only if OPENMP is defined. */ #ifdef OPENMP foff = dim * prm->Natom * threadnum; #else foff = 0; #endif /* * Loop over all 1-2 bonds. Map loop indices onto OpenMP threads * or MPI tasks using (static, 1) scheduling. */ for (i = threadnum; i < nbond; i += numthreads) { at1 = dim * a1[i] / 3; at2 = dim * a2[i] / 3; atyp = atype[i] - 1; rx = x[at1 ] - x[at2 ]; ry = x[at1 + 1] - x[at2 + 1]; rz = x[at1 + 2] - x[at2 + 2]; r2 = rx * rx + ry * ry + rz * rz; if (dim == 4) { rw = x[at1 + 3] - x[at2 + 3]; r2 += rw * rw; } s = sqrt(r2); r = 2.0 / s; db = s - Req[atyp]; df = Rk[atyp] * db; e = df * db; e_bond += e; df *= r; f[foff + at1 + 0] += rx * df; f[foff + at1 + 1] += ry * df; f[foff + at1 + 2] += rz * df; f[foff + at2 + 0] -= rx * df; f[foff + at2 + 1] -= ry * df; f[foff + at2 + 2] -= rz * df; if (dim == 4) { f[foff + at1 + 3] += rw * df; f[foff + at2 + 3] -= rw * df; } } } return (e_bond); } /*********************************************************************** EANGL() ************************************************************************/ /* Calculate the bond bending energy and first derivatives. */ static REAL_T eangl( int nang, int *a1, int *a2, int *a3, int *atype, REAL_T *Tk, REAL_T *Teq, REAL_T *x, REAL_T *f ) { int i, atyp, at1, at2, at3, foff, threadnum, numthreads; REAL_T dxi, dyi, dzi, dwi, dxj, dyj, dzj, dwj, ri2, rj2, ri, rj, rir, rjr; REAL_T dxir, dyir, dzir, dwir, dxjr, dyjr, dzjr, dwjr, cst, at, da, df, e, e_theta; REAL_T xtmp, dxtmp, ytmp, wtmp, dytmp, ztmp, dztmp, dwtmp; e_theta = 0.0; #pragma omp parallel reduction (+: e_theta) \ private (i, foff, at1, at2, at3, atyp, threadnum, numthreads, \ dxi, dyi, dzi, dwi, dxj, dyj, dzj, dwj, ri2, rj2, ri, rj, rir, rjr, \ dxir, dyir, dzir, dwir, dxjr, dyjr, dzjr, dwjr, cst, at, da, df, e, \ xtmp, dxtmp, ytmp, dytmp, ztmp, dztmp, wtmp, dwtmp) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute an offset into the gradient array for this thread, * but only if OPENMP is defined. */ #ifdef OPENMP foff = dim * prm->Natom * threadnum; #else foff = 0; #endif /* * Loop over all 1-3 bonds. Map loop indices onto OpenMP threads * or MPI tasks using (static, 1) scheduling. */ for (i = threadnum; i < nang; i += numthreads) { at1 = dim * a1[i] / 3; at2 = dim * a2[i] / 3; at3 = dim * a3[i] / 3; atyp = atype[i] - 1; dxi = x[at1 ] - x[at2 ]; dyi = x[at1 + 1] - x[at2 + 1]; dzi = x[at1 + 2] - x[at2 + 2]; dxj = x[at3 ] - x[at2 ]; dyj = x[at3 + 1] - x[at2 + 1]; dzj = x[at3 + 2] - x[at2 + 2]; ri2 = dxi * dxi + dyi * dyi + dzi * dzi; rj2 = dxj * dxj + dyj * dyj + dzj * dzj; if (dim == 4) { dwi = x[at1 + 3] - x[at2 + 3]; dwj = x[at3 + 3] - x[at2 + 3]; ri2 += dwi * dwi; rj2 += dwj * dwj; } ri = sqrt(ri2); rj = sqrt(rj2); rir = 1. / ri; rjr = 1. / rj; dxir = dxi * rir; dyir = dyi * rir; dzir = dzi * rir; dxjr = dxj * rjr; dyjr = dyj * rjr; dzjr = dzj * rjr; cst = dxir * dxjr + dyir * dyjr + dzir * dzjr; if (dim == 4) { dwir = dwi * rir; dwjr = dwj * rjr; cst += dwir * dwjr; } if (cst > 1.0) cst = 1.0; if (cst < -1.0) cst = -1.0; at = acos(cst); da = at - Teq[atyp]; df = da * Tk[atyp]; e = df * da; e_theta = e_theta + e; df = df + df; at = sin(at); if (at > 0 && at < 1.e-3) at = 1.e-3; else if (at < 0 && at > -1.e-3) at = -1.e-3; df = -df / at; xtmp = df * rir * (dxjr - cst * dxir); dxtmp = df * rjr * (dxir - cst * dxjr); ytmp = df * rir * (dyjr - cst * dyir); dytmp = df * rjr * (dyir - cst * dyjr); ztmp = df * rir * (dzjr - cst * dzir); dztmp = df * rjr * (dzir - cst * dzjr); f[foff + at1 + 0] += xtmp; f[foff + at3 + 0] += dxtmp; f[foff + at2 + 0] -= xtmp + dxtmp; f[foff + at1 + 1] += ytmp; f[foff + at3 + 1] += dytmp; f[foff + at2 + 1] -= ytmp + dytmp; f[foff + at1 + 2] += ztmp; f[foff + at3 + 2] += dztmp; f[foff + at2 + 2] -= ztmp + dztmp; if (dim == 4) { wtmp = df * rir * (dwjr - cst * dwir); dwtmp = df * rjr * (dwir - cst * dwjr); f[foff + at1 + 3] += wtmp; f[foff + at3 + 3] += dwtmp; f[foff + at2 + 3] -= wtmp + dwtmp; } } } return (e_theta); } /*********************************************************************** EPHI() ************************************************************************/ /* Calculate the dihedral torsion energy and first derivatives. */ static REAL_T ephi( int nphi, int *a1, int *a2, int *a3, int *a4, int *atype, REAL_T *Pk, REAL_T *Pn, REAL_T *Phase, REAL_T *x, REAL_T *f ) { REAL_T e, co, den, co1, uu, vv, uv, ax, bx, cx, ay, by, cy, az, bz, cz, aw, bw, cw; REAL_T a0x, b0x, c0x, a0y, b0y, c0y, a0z, b0z, c0z, a0w, b0w, c0w, a1x, b1x; REAL_T a1y, b1y, a1z, b1z, a1w, b1w, a2x, b2x, a2y, b2y, a2z, b2z, a2w, b2w; REAL_T dd1x, dd2x, dd3x, dd4x, dd1y, dd2y, dd3y, dd4y, dd1z, dd2z, dd3z, dd4z; REAL_T dd1w, dd2w, dd3w, dd4w; REAL_T df, aa, bb, cc, ab, bc, ac, cosq; REAL_T ktors, phase, e_tors; int i, at1, at2, at3, at4, atyp, foff, threadnum, numthreads; REAL_T ux, uy, uz, vx, vy, vz, delta, phi, dx1, dy1, dz1, yy, pi; pi = 3.1415927; e_tors = 0.0; #pragma omp parallel reduction (+: e_tors) \ private (i, at1, at2, at3, at4, atyp, ax, ay, az, aw, bx, by, bz, bw, \ cx, cy, cz, cw, ab, bc, ac, aa, bb, cc, uu, vv, uv, den, co, co1, \ a0x, a0y, a0z, a0w, b0x, b0y, b0z, b0w, c0x, c0y, c0z, c0w, \ a1x, a1y, a1z, a1w, b1x, b1y, b1z, b1w, a2x, a2y, a2z, a2w, \ b2x, b2y, b2z, b2w, dd1x, dd1y, dd1z, dd1w, dd2x, dd2y, dd2z, dd2w, \ dd3x, dd3y, dd3z, dd3w, dd4x, dd4y, dd4z, dd4w, phi, \ ux, uy, uz, vx, vy, vz, dx1, dy1, dz1, delta, df, e, yy, phase, \ ktors, cosq, threadnum, numthreads, foff) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute an offset into the gradient array for this thread, * but only if OPENMP is defined. */ #ifdef OPENMP foff = dim * prm->Natom * threadnum; #else foff = 0; #endif /* * Loop over all 1-4 bonds. Map loop indices onto OpenMP threads * or MPI tasks using (static, 1) scheduling. */ for (i = threadnum; i < nphi; i += numthreads) { at1 = dim * a1[i] / 3; at2 = dim * a2[i] / 3; at3 = dim * abs(a3[i]) / 3; at4 = dim * abs(a4[i]) / 3; atyp = atype[i] - 1; ax = x[at2 + 0] - x[at1 + 0]; ay = x[at2 + 1] - x[at1 + 1]; az = x[at2 + 2] - x[at1 + 2]; bx = x[at3 + 0] - x[at2 + 0]; by = x[at3 + 1] - x[at2 + 1]; bz = x[at3 + 2] - x[at2 + 2]; cx = x[at4 + 0] - x[at3 + 0]; cy = x[at4 + 1] - x[at3 + 1]; cz = x[at4 + 2] - x[at3 + 2]; if (dim == 4) { aw = x[at2 + 3] - x[at1 + 3]; bw = x[at3 + 3] - x[at2 + 3]; cw = x[at4 + 3] - x[at3 + 3]; # define DOT4(a,b,c,d,e,f,g,h) a*e + b*f + c*g + d*h ab = DOT4(ax, ay, az, aw, bx, by, bz, bw); bc = DOT4(bx, by, bz, bw, cx, cy, cz, cw); ac = DOT4(ax, ay, az, aw, cx, cy, cz, cw); aa = DOT4(ax, ay, az, aw, ax, ay, az, aw); bb = DOT4(bx, by, bz, bw, bx, by, bz, bw); cc = DOT4(cx, cy, cz, cw, cx, cy, cz, cw); } else { # define DOT3(a,b,c,d,e,f) a*d + b*e + c*f ab = DOT3(ax, ay, az, bx, by, bz); bc = DOT3(bx, by, bz, cx, cy, cz); ac = DOT3(ax, ay, az, cx, cy, cz); aa = DOT3(ax, ay, az, ax, ay, az); bb = DOT3(bx, by, bz, bx, by, bz); cc = DOT3(cx, cy, cz, cx, cy, cz); } uu = (aa * bb) - (ab * ab); vv = (bb * cc) - (bc * bc); uv = (ab * bc) - (ac * bb); den = 1.0 / sqrt(uu * vv); co = uv * den; co1 = 0.5 * co * den; a0x = -bc * bx + bb * cx; a0y = -bc * by + bb * cy; a0z = -bc * bz + bb * cz; b0x = ab * cx + bc * ax - 2. * ac * bx; b0y = ab * cy + bc * ay - 2. * ac * by; b0z = ab * cz + bc * az - 2. * ac * bz; c0x = ab * bx - bb * ax; c0y = ab * by - bb * ay; c0z = ab * bz - bb * az; a1x = 2. * uu * (-cc * bx + bc * cx); a1y = 2. * uu * (-cc * by + bc * cy); a1z = 2. * uu * (-cc * bz + bc * cz); b1x = 2. * uu * (bb * cx - bc * bx); b1y = 2. * uu * (bb * cy - bc * by); b1z = 2. * uu * (bb * cz - bc * bz); a2x = -2. * vv * (bb * ax - ab * bx); a2y = -2. * vv * (bb * ay - ab * by); a2z = -2. * vv * (bb * az - ab * bz); b2x = 2. * vv * (aa * bx - ab * ax); b2y = 2. * vv * (aa * by - ab * ay); b2z = 2. * vv * (aa * bz - ab * az); dd1x = (a0x - a2x * co1) * den; dd1y = (a0y - a2y * co1) * den; dd1z = (a0z - a2z * co1) * den; dd2x = (-a0x - b0x - (a1x - a2x - b2x) * co1) * den; dd2y = (-a0y - b0y - (a1y - a2y - b2y) * co1) * den; dd2z = (-a0z - b0z - (a1z - a2z - b2z) * co1) * den; dd3x = (b0x - c0x - (-a1x - b1x + b2x) * co1) * den; dd3y = (b0y - c0y - (-a1y - b1y + b2y) * co1) * den; dd3z = (b0z - c0z - (-a1z - b1z + b2z) * co1) * den; dd4x = (c0x - b1x * co1) * den; dd4y = (c0y - b1y * co1) * den; dd4z = (c0z - b1z * co1) * den; if (dim == 4) { a0w = -bc * bw + bb * cw; b0w = ab * cw + bc * aw - 2. * ac * bw; c0w = ab * bw - bb * aw; a1w = 2. * uu * (-cc * bw + bc * cw); b1w = 2. * uu * (bb * cw - bc * bw); a2w = -2. * vv * (bb * aw - ab * bw); b2w = 2. * vv * (aa * bw - ab * aw); dd1w = (a0w - a2w * co1) * den; dd2w = (-a0w - b0w - (a1w - a2w - b2w) * co1) * den; dd3w = (b0w - c0w - (-a1w - b1w + b2w) * co1) * den; dd4w = (c0w - b1w * co1) * den; } if (prm->Nhparm && a3[i] < 0) { /* here we will use a quadratic form for the improper torsion */ /* we are using the NHPARM variable in prmtop to trigger this */ /* WARNING: phi itself is here calculated from the first three coords-- --- may fail! */ /* Note: The following improper torsion code does not support 4D! */ co = co > 1.0 ? 1.0 : co; co = co < -1.0 ? -1.0 : co; phi = acos(co); /* now calculate sin(phi) because cos(phi) is symmetric, so we can decide between +-phi. */ ux = ay * bz - az * by; uy = az * bx - ax * bz; uz = ax * by - ay * bx; vx = by * cz - bz * cy; vy = bz * cx - bx * cz; vz = bx * cy - by * cx; dx1 = uy * vz - uz * vy; dy1 = uz * vx - ux * vz; dz1 = ux * vy - uy * vx; dx1 = DOT3(dx1, dy1, dz1, bx, by, bz); if (dx1 < 0.0) phi = -phi; delta = phi - Phase[atyp]; delta = delta > pi ? pi : delta; delta = delta < -pi ? -pi : delta; df = Pk[atyp] * delta; e = df * delta; e_tors += e; yy = sin(phi); /* Decide what expansion to use Check first for the "normal" expression, since it will be the most used the 0.001 value could be lowered for increased precision. This insures ~1e-05% error for sin(phi)=0.001 */ if (fabs(yy) > 0.001) { df = -2.0 * df / yy; } else { if (fabs(delta) < 0.10) { if (Phase[atyp] == 0.0) { df = -2.0 * Pk[atyp] * (1 + phi * phi / 6.0); } else { if (fabs(Phase[atyp]) == pi) { df = 2.0 * Pk[atyp] * (1 + delta * delta / 6.0); } } } else { if ((phi > 0.0 && phi < (pi / 2.0)) || (phi < 0.0 && phi > -pi / 2.0)) df = df * 1000.; else df = -df * 1000.; } } } else { multi_term: if (fabs(Phase[atyp] - 3.142) < 0.01) phase = -1.0; else phase = 1.0; ktors = Pk[atyp]; switch ((int) fabs(Pn[atyp])) { case 1: e = ktors * (1.0 + phase * co); df = phase * ktors; break; case 2: e = ktors * (1.0 + phase * (2.*co*co -1.)); df = phase*ktors*4.*co; break; case 3: cosq = co*co; e = ktors * (1.0 + phase * co*(4.*cosq -3.)); df = phase*ktors*(12.*cosq - 3.); break; case 4: cosq = co*co; e = ktors * (1.0 + phase * (8.*cosq*(cosq - 1.) + 1.)); df = phase*ktors*co*( 32.*cosq -16.); break; case 6: cosq = co*co; e = ktors * (1.0 + phase * (32.*cosq*cosq*cosq - 48.*cosq*cosq + 18.*cosq - 1.)); df = phase*ktors*co*( 192.*cosq*cosq - 192.*cosq + 36. ); break; default: fprintf(stderr, "bad value for Pn: %d %d %d %d %8.3f\n", at1, at2, at3, at4, Pn[atyp]); exit(1); } e_tors += e; } f[foff + at1 + 0] += df * dd1x; f[foff + at1 + 1] += df * dd1y; f[foff + at1 + 2] += df * dd1z; f[foff + at2 + 0] += df * dd2x; f[foff + at2 + 1] += df * dd2y; f[foff + at2 + 2] += df * dd2z; f[foff + at3 + 0] += df * dd3x; f[foff + at3 + 1] += df * dd3y; f[foff + at3 + 2] += df * dd3z; f[foff + at4 + 0] += df * dd4x; f[foff + at4 + 1] += df * dd4y; f[foff + at4 + 2] += df * dd4z; if (dim == 4) { f[foff + at1 + 3] += df * dd1w; f[foff + at2 + 3] += df * dd2w; f[foff + at3 + 3] += df * dd3w; f[foff + at4 + 3] += df * dd4w; } #ifdef PRINT_EPHI printf("%4d%4d%4d%4d%4d%8.3f\n", i + 1, at1, at2, at3, at4, e); printf("%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f\n", -df * dd1x, -df * dd1y, -df * dd1z, -df * dd2x, -df * dd2y, -df * dd2z, -df * dd3x, -df * dd3y); printf("%10.5f%10.5f%10.5f%10.5f\n", -df * dd3z, -df * dd4x, -df * dd4y, -df * dd4z); #endif if (Pn[atyp] < 0.0) { atyp++; goto multi_term; } } } return (e_tors); } #if 0 /*********************************************************************** EIMPHI() ************************************************************************/ /* * This function for calculation of improper torsion is included because * it has historically been included in eff.c. However, it has not been * modified to support 4D, and it is not called by any other function. */ static REAL_T eimphi( int nphi, int *a1, int *a2, int *a3, int *a4, int *atype, REAL_T *Pk, REAL_T *Peq, REAL_T *x, REAL_T *f ) { /* calculate improper dihedral energies and forces according to T.Schlick, J.Comput.Chem, vol 10, 7 (1989) with local variant (no delta funcion involved) If |sin(phi)|.gt.1e-03 use the angle directly V = kimp ( phi -impeq ) ^2 dV/dcos(phi) = -2*k/sin(phi) If |sin(phi)|.lt.1e-03 and (phi-Peq).lt.0.1 and cnseq(phi)=0. V = kimp (phi - impeq)^2 dV/dcos(phi) = -2*k*(1+phi*phi/6.) If |sin(phi)|.lt.1e-03 and (phi-Peq).lt.0.1 and cnseq(phi)=pi V = kimp (phi - impeq)^2 dV/dcos(phi) = 2*k*(1+phi*phi/6.) Now, if |sin(phi)|.lt.1e-03 but far from minimum, set sin(phi) to 1e-03 arbitrarily */ REAL_T e; REAL_T co, phi, den, co1; REAL_T uu, vv, uv; REAL_T ax, bx, cx; REAL_T ay, by, cy; REAL_T az, bz, cz; REAL_T ux, uy, uz; REAL_T vx, vy, vz; REAL_T dx1, dy1, dz1; REAL_T a0x, b0x, c0x; REAL_T a0y, b0y, c0y; REAL_T a0z, b0z, c0z; REAL_T a1x, b1x; REAL_T a1y, b1y; REAL_T a1z, b1z; REAL_T a2x, b2x; REAL_T a2y, b2y; REAL_T a2z, b2z; REAL_T dd1x, dd2x, dd3x, dd4x; REAL_T dd1y, dd2y, dd3y, dd4y; REAL_T dd1z, dd2z, dd3z, dd4z; REAL_T df, delta, pi, yy; REAL_T aa, bb, cc, ab, bc, ac; REAL_T e_imp; int i; int at1, at2, at3, at4, atyp; pi = 3.1415927; e_imp = 0.0; for (i = 0; i < nphi; i++) { at1 = a1[i]; at2 = a2[i]; at3 = abs(a3[i]); at4 = abs(a4[i]); atyp = atype[i] - 1; /* calculate distances */ ax = x[at2 + 0] - x[at1 + 0]; ay = x[at2 + 1] - x[at1 + 1]; az = x[at2 + 2] - x[at1 + 2]; bx = x[at3 + 0] - x[at2 + 0]; by = x[at3 + 1] - x[at2 + 1]; bz = x[at3 + 2] - x[at2 + 2]; cx = x[at4 + 0] - x[at3 + 0]; cy = x[at4 + 1] - x[at3 + 1]; cz = x[at4 + 2] - x[at3 + 2]; # define DOT(a,b,c,d,e,f) a*d + b*e + c*f ab = DOT(ax, ay, az, bx, by, bz); bc = DOT(bx, by, bz, cx, cy, cz); ac = DOT(ax, ay, az, cx, cy, cz); aa = DOT(ax, ay, az, ax, ay, az); bb = DOT(bx, by, bz, bx, by, bz); cc = DOT(cx, cy, cz, cx, cy, cz); uu = (aa * bb) - (ab * ab); vv = (bb * cc) - (bc * bc); uv = (ab * bc) - (ac * bb); den = 1.0 / sqrt(fabs(uu * vv)); co = uv * den; co = co > 1.0 ? 1.0 : co; co = co < -1.0 ? -1.0 : co; phi = acos(co); /* now calculate sin(phi) because cos(phi) is symmetric, so we can decide between +-phi. */ ux = ay * bz - az * by; uy = az * bx - ax * bz; uz = ax * by - ay * bx; vx = by * cz - bz * cy; vy = bz * cx - bx * cz; vz = bx * cy - by * cx; dx1 = uy * vz - uz * vy; dy1 = uz * vx - ux * vz; dz1 = ux * vy - uy * vx; dx1 = DOT(dx1, dy1, dz1, bx, by, bz); if (dx1 < 0.0) phi = -phi; delta = phi - Peq[atyp]; delta = delta > pi ? pi : delta; delta = delta < -pi ? -pi : delta; df = Pk[atyp] * delta; e = df * delta; e_imp += e; yy = sin(phi); /* Decide what expansion to use Check first for the "normal" expression, since it will be the most used the 0.001 value could be lowered for increased precision. This insures ~1e-05% error for sin(phi)=0.001 */ if (fabs(yy) > 0.001) { df = -2.0 * df / yy; } else { if (fabs(delta) < 0.10) { if (Peq[atyp] == 0.0) { df = -2.0 * Pk[atyp] * (1 + phi * phi / 6.0); } else { if (fabs(Peq[atyp]) == pi) df = 2.0 * Pk[atyp] * (1 + delta * delta / 6.0); } } else { if ((phi > 0.0 && phi < (pi / 2.0)) || (phi < 0.0 && phi > -pi / 2.0)) df = df * 1000.; else df = -df * 1000.; } } co1 = 0.5 * co * den; a0x = -bc * bx + bb * cx; a0y = -bc * by + bb * cy; a0z = -bc * bz + bb * cz; b0x = ab * cx + bc * ax - 2. * ac * bx; b0y = ab * cy + bc * ay - 2. * ac * by; b0z = ab * cz + bc * az - 2. * ac * bz; c0x = ab * bx - bb * ax; c0y = ab * by - bb * ay; c0z = ab * bz - bb * az; a1x = 2. * uu * (-cc * bx + bc * cx); a1y = 2. * uu * (-cc * by + bc * cy); a1z = 2. * uu * (-cc * bz + bc * cz); b1x = 2. * uu * (bb * cx - bc * bx); b1y = 2. * uu * (bb * cy - bc * by); b1z = 2. * uu * (bb * cz - bc * bz); a2x = -2. * vv * (bb * ax - ab * bx); a2y = -2. * vv * (bb * ay - ab * by); a2z = -2. * vv * (bb * az - ab * bz); b2x = 2. * vv * (aa * bx - ab * ax); b2y = 2. * vv * (aa * by - ab * ay); b2z = 2. * vv * (aa * bz - ab * az); dd1x = (a0x - a2x * co1) * den; dd1y = (a0y - a2y * co1) * den; dd1z = (a0z - a2z * co1) * den; dd2x = (-a0x - b0x - (a1x - a2x - b2x) * co1) * den; dd2y = (-a0y - b0y - (a1y - a2y - b2y) * co1) * den; dd2z = (-a0z - b0z - (a1z - a2z - b2z) * co1) * den; dd3x = (b0x - c0x - (-a1x - b1x + b2x) * co1) * den; dd3y = (b0y - c0y - (-a1y - b1y + b2y) * co1) * den; dd3z = (b0z - c0z - (-a1z - b1z + b2z) * co1) * den; dd4x = (c0x - b1x * co1) * den; dd4y = (c0y - b1y * co1) * den; dd4z = (c0z - b1z * co1) * den; f[at1 + 0] += df * dd1x; f[at1 + 1] += df * dd1y; f[at1 + 2] += df * dd1z; f[at2 + 0] += df * dd2x; f[at2 + 1] += df * dd2y; f[at2 + 2] += df * dd2z; f[at3 + 0] += df * dd3x; f[at3 + 1] += df * dd3y; f[at3 + 2] += df * dd3z; f[at4 + 0] += df * dd4x; f[at4 + 1] += df * dd4y; f[at4 + 2] += df * dd4z; } return (e_imp); } #endif /*********************************************************************** DOWNHEAP_PAIRS() ************************************************************************/ /* * The downheap function from Robert Sedgewick's "Algorithms in C++" p. 152, * corrected for the fact that Sedgewick indexes the heap array from 1 to n * whereas Java indexes the heap array from 0 to n-1. Note, however, that * the heap should be indexed conceptually from 1 to n in order that for * any node k the two children are found at nodes 2*k and 2*k+1. Move down * the heap, exchanging the node at position k with the larger of its two * children if necessary and stopping when the node at k is larger than both * children or the bottom of the heap is reached. Note that it is possible * for the node at k to have only one child: this case is treated properly. * A full exchange is not necessary because the variable 'v' is involved in * the exchanges. The 'while' loop has two exits: one for the case that * the bottom of the heap is hit, and another for the case that the heap * condition (the parent is greater than or equal to both children) is * satisfied somewhere in the interior of the heap. * * Used by the heapsort_pairs function which sorts the pair list arrays. * * Calling parameters are as follows: * * a - array of indices into the atomic coordinate array x * n - the number of items to be sorted * k - the exchange node (or element) */ static void downheap_pairs(int *a, int n, int k) { int j, v; v = a[k-1]; while (k <= n/2) { j = k + k; if ( (j < n) && (a[j-1] < a[j]) ) j++; if (v >= a[j-1]) break; a[k-1] = a[j-1]; k = j; } a[k-1] = v; } /*********************************************************************** HEAPSORT_PAIRS() ************************************************************************/ /* * The heapsort function from Robert Sedgewick's "Algorithms in C++" p. 156, * corrected for the fact that Sedgewick indexes the heap array from 1 to n * whereas Java indexes the heap array from 0 to n-1. Note, however, that * the heap should be indexed conceptually from 1 to n in order that for * any node k the two children are found at nodes 2*k and 2*k+1. In what * follows, the 'for' loop heaporders the array in linear time and the * 'while' loop exchanges the largest element with the last element then * repairs the heap. * * Calling parameters are as follows: * * a - array of indices into the atomic coordinate array x * n - the number of items to be sorted * * Used by the nblist function to sort the pair list arrays. */ static void heapsort_pairs(int *a, int n) { int k, v; for (k = n/2; k >= 1; k--) downheap_pairs(a, n, k); while (n > 1) { v = a[0]; a[0] = a[n-1]; a[n-1] = v; downheap_pairs(a, --n, 1); } } /*********************************************************************** DOWNHEAP_INDEX() ************************************************************************/ /* * The downheap function from Robert Sedgewick's "Algorithms in C++" p. 152, * corrected for the fact that Sedgewick indexes the heap array from 1 to n * whereas Java indexes the heap array from 0 to n-1. Note, however, that * the heap should be indexed conceptually from 1 to n in order that for * any node k the two children are found at nodes 2*k and 2*k+1. Move down * the heap, exchanging the node at position k with the larger of its two * children if necessary and stopping when the node at k is larger than both * children or the bottom of the heap is reached. Note that it is possible * for the node at k to have only one child: this case is treated properly. * A full exchange is not necessary because the variable 'v' is involved in * the exchanges. The 'while' loop has two exits: one for the case that * the bottom of the heap is hit, and another for the case that the heap * condition (the parent is greater than or equal to both children) is * satisfied somewhere in the interior of the heap. * * Used by the heapsort_index function which sorts the index arrays indirectly * by comparing components of the Cartesian coordinates. * * Calling parameters are as follows: * * a - array of indices into the atomic coordinate array x * n - the number of items to be sorted * k - the exchange node (or element) * x - the atomic coordinate array * p - the partition (x, y, z or w) on which sorting occurs */ static void downheap_index(int *a, int n, int k, REAL_T *x, int p) { int j, v; v = a[k-1]; while (k <= n/2) { j = k + k; if ( (j < n) && (x[dim * a[j-1] + p] < x[dim * a[j] + p]) ) j++; if (x[dim * v + p] >= x[dim * a[j-1] + p]) break; a[k-1] = a[j-1]; k = j; } a[k-1] = v; } /*********************************************************************** HEAPSORT_INDEX() ************************************************************************/ /* * The heapsort function from Robert Sedgewick's "Algorithms in C++" p. 156, * corrected for the fact that Sedgewick indexes the heap array from 1 to n * whereas Java indexes the heap array from 0 to n-1. Note, however, that * the heap should be indexed conceptually from 1 to n in order that for * any node k the two children are found at nodes 2*k and 2*k+1. In what * follows, the 'for' loop heaporders the array in linear time and the * 'while' loop exchanges the largest element with the last element then * repairs the heap. * * Calling parameters are as follows: * * a - array of indices into the atomic coordinate array x * n - the number of items to be sorted * x - the atomic coordinate array * p - the partition (x, y, z or w) on which sorting occurs * * Used by the nblist function to sort the xn, yn, zn, wn and on arrays. */ static void heapsort_index(int *a, int n, REAL_T *x, int p) { int k, v; for (k = n/2; k >= 1; k--) downheap_index(a, n, k, x, p); while (n > 1) { v = a[0]; a[0] = a[n-1]; a[n-1] = v; downheap_index(a, --n, 1, x, p); } } /*********************************************************************** BUILDKDTREE() ************************************************************************/ /* * Build the kd tree by recursively subdividing the atom number * arrays and adding nodes to the tree. Note that the arrays * are permuted cyclically as control moves down the tree in * order that sorting occur on x, y, z and (for 4D) w. Also, * if it is desired that the kd tree provide a partial atom * order, the sorting will occur on o, x, y, z and (for 4D) w. The * temporary array is provided for the copy and partition operation. * * Calling parameters are as follows: * * xn - x sorted array of atom numbers * yn - y sorted array of atom numbers * zn - z sorted array of atom numbers * wn - w sorted array of atom numbers * on - ordinal array of atom numbers * tn - temporary array for atom numbers * start - first element of array * end - last element of array * kdpptr - pointer to pointer to kd tree node next available for allocation * that - the node currently visited, the equivalent of 'this' in C++ * x - atomic coordinate array * p - the partition (x, y, z, w or o) on which sorting occurs */ #define SORT_ATOM_NUMBERS static void buildkdtree(int *xn, int *yn, int *zn, int *wn, int *on, int *tn, int start, int end, KDNODE_T **kdpptr, KDNODE_T *that, REAL_T *x, int p) { int i, middle, imedian, lower, upper; REAL_T median; /* * The partition cycles by dim unless SORT_ATOM_NUMBERS is defined, * in which case it cycles by dim+1. Note that if SORT_ATOM_NUMBERS * is defined and the partition equals zero, sorting will occur * on the ordinal atom number instead of the atom's cartesian * coordinate. */ #ifndef SORT_ATOM_NUMBERS p %= dim; #else p %= (dim + 1); #endif /* If only one element is passed to this function, add it to the tree. */ if (end == start) { that->n = xn[start]; } /* * Otherwise, if two elements are passed to this function, determine * whether the first element is the low child or the high child. Or, * if neither element is the low child, choose the second element to * be the high child. Allocate a new KDNODE_T and make it one or the * other of the children. */ else if (end == start + 1) { /* Check whether the first element is the low child. */ #ifdef SORT_ATOM_NUMBERS if ( ((p == 0) && (xn[start] < xn[end])) || ((p != 0) && (x[dim * xn[start] + p-1] < x[dim * xn[end] + p-1])) ) #else if (x[dim * xn[start] + p] < x[dim * xn[end] + p]) #endif { that->n = xn[end]; (*kdpptr)->n = xn[start]; (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->lo = (*kdpptr)++; } /* Check whether the second element is the low child. */ #ifdef SORT_ATOM_NUMBERS else if ( ((p == 0) && (xn[start] > xn[end])) || ((p != 0) && (x[dim * xn[start] + p-1] > x[dim * xn[end] + p-1])) ) #else if (x[dim * xn[start] + p] > x[dim * xn[end] + p]) #endif { that->n = xn[start]; (*kdpptr)->n = xn[end]; (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->lo = (*kdpptr)++; } /* Neither element is the low child so use the second as the high child. */ else { that->n = xn[start]; (*kdpptr)->n = xn[end]; (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->hi = (*kdpptr)++; } } /* Otherwise, more than two elements are passed to this function. */ else { /* * The middle element of the xn array is taken as the element about * which the yn and zn arrays will be partitioned. However, search * lower elements of the xn array to ensure that the p values of the * atomic coordinate array that correspond to these elements are indeed * less than the median value because they may be equal to it. This * approach is consistent with partitioning between < and >=. * * The search described above is not necessary if SORT_ATOM_NUMBERS is * defined and p==0 because in this case sorting occurs on the * ordinal atom number instead of the atomic coordinate, and the * ordinal atom numbers are all unique. */ middle = (start + end)/2; #ifdef SORT_ATOM_NUMBERS if (p == 0) { imedian = xn[middle]; } else { median = x[dim * xn[middle] + p-1]; for (i = middle - 1; i >= start; i--) { if (x[dim * xn[i] + p-1] < median) { break; } else { middle = i; } } } #else median = x[dim * xn[middle] + p]; for (i = middle - 1; i >= start; i--) { if (x[dim * xn[i] + p] < median) { break; } else { middle = i; } } #endif /* Store the middle element at this kd node. */ that->n = xn[middle]; /* * Scan the yn array in ascending order and compare the p value of * each corresponding element of the atomic coordinate array to the * median value. If the p value is less than the median value, copy * the element of the yn array into the lower part of the tn array. * If the p value is greater than or equal to the median value, copy * the element of the yn array into the upper part of the tn array. * The lower part of the tn array begins with the start index, and the * upper part of the tn array begins one element above the middle index. * At the end of this scan and copy operation, the tn array will have * been subdivided into three groups: (1) a group of indices beginning * with start and continuing up to but not including middle, which indices * point to atoms for which the p value is less than the median value; * (2) the middle index that has been stored in this node of the kd tree; * and (3) a group of indices beginning one address above middle and * continuing up to and including end, which indices point to atoms for * which the p value is greater than or equal to the median value. * * This approach preserves the relative heapsorted order of elements * of the atomic coordinate array that correspond to elements of the * yn array while those elements are partitioned about the p median. * * Note: when scanning the yn array, skip the element (i.e., the atom * number) that equals the middle element because that atom number has * been stored at this node of the kd-tree. */ lower = start - 1; upper = middle; for (i = start; i <= end; i++) { if (yn[i] != xn[middle]) { #ifdef SORT_ATOM_NUMBERS if ( ((p == 0) && (yn[i] < imedian)) || ((p != 0) && (x[dim * yn[i] + p-1] < median)) ) #else if (x[dim * yn[i] + p] < median) #endif { tn[++lower] = yn[i]; } else { tn[++upper] = yn[i]; } } } /* * All elements of the yn array between start and end have been copied * and partitioned into the tn array, so the yn array is available for * elements of the zn array to be copied and partitioned into the yn * array, in the same manner in which elements of the yn array were * copied and partitioned into the tn array. * * This approach preserves the relative heapsorted order of elements * of the atomic coordinate array that correspond to elements of the * zn array while those elements are partitioned about the p median. * * Note: when scanning the zn array, skip the element (i.e., the atom * number) that equals the middle element because that atom number has * been stored at this node of the kd-tree. */ lower = start - 1; upper = middle; for (i = start; i <= end; i++) { if (zn[i] != xn[middle]) { #ifdef SORT_ATOM_NUMBERS if ( ((p == 0) && (zn[i] < imedian)) || ((p != 0) && (x[dim * zn[i] + p-1] < median)) ) #else if (x[dim * zn[i] + p] < median) #endif { yn[++lower] = zn[i]; } else { yn[++upper] = zn[i]; } } } /* * Execute the following region of code if SORT_ATOM_NUMBERS is defined, * or if SORT_ATOM_NUMBERS is not defined and dim==4. */ #ifndef SORT_ATOM_NUMBERS if (dim == 4) #endif { /* * All elements of the zn array between start and end have been copied * and partitioned into the yn array, so the zn array is available for * elements of the wn array to be copied and partitioned into the zn * array, in the same manner in which elements of the zn array were * copied and partitioned into the yn array. * * This approach preserves the relative heapsorted order of elements * of the atomic coordinate array that correspond to elements of the * wn array while those elements are partitioned about the p median. * * Note: when scanning the wn array, skip the element (i.e., the atom * number) that equals the middle element because that atom number has * been stored at this node of the kd-tree. */ lower = start - 1; upper = middle; for (i = start; i <= end; i++) { if (wn[i] != xn[middle]) { #ifdef SORT_ATOM_NUMBERS if ( ((p == 0) && (wn[i] < imedian)) || ((p != 0) && (x[dim * wn[i] + p-1] < median)) ) #else if (x[dim * wn[i] + p] < median) #endif { zn[++lower] = wn[i]; } else { zn[++upper] = wn[i]; } } } } /* * Execute the following region of code if SORT_ATOM_NUMBERS is defined * and dim==4. */ #ifdef SORT_ATOM_NUMBERS if (dim == 4) { /* * All elements of the wn array between start and end have been copied * and partitioned into the zn array, so the wn array is available for * elements of the on array to be copied and partitioned into the wn * array, in the same manner in which elements of the wn array were * copied and partitioned into the zn array. * * This approach preserves the relative heapsorted order of elements * of the atomic coordinate array that correspond to elements of the * wn array while those elements are partitioned about the p median. * * Note: when scanning the on array, skip the element (i.e., the atom * number) that equals the middle element because that atom number has * been stored at this node of the kd-tree. */ lower = start - 1; upper = middle; for (i = start; i <= end; i++) { if (on[i] != xn[middle]) { if ( ((p == 0) && (on[i] < imedian)) || ((p != 0) && (x[dim * on[i] + p-1] < median)) ) { wn[++lower] = on[i]; } else { wn[++upper] = on[i]; } } } } #endif /* * Recurse down the lo branch of the tree if the lower group of * the tn array is non-null. Note permutation of the xn, yn, zn, wn, * on and tn arrays. In particular, xn was used for partitioning at * this level of the tree. At one level down the tree, yn (which * has been copied into tn) will be used for partitioning. At two * levels down the tree, zn (which has been copied into yn) will * be used for partitioning. If SORT_ATOM_NUMBERS is defined, or if * SORT_ATOM_NUMBERS is not defined and dim==4, at three levels down the * tree, wn (which has been copied into zn) will be used for partitoning. * At four levels down the tree, xn will be used for partitioning. * In this manner, partitioning cycles through xn, yn, zn and wn * at successive levels of the tree. * * Note that for 3D the wn array isn't allocated so don't permute it * cyclically along with the other arrays in the recursive call. * * Note also that if SORT_ATOM_NUMBERS isn't defined the on array isn't * allocated so don't permute it cyclically in the recursive call. */ if (lower >= start) { (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->lo = (*kdpptr)++; #ifndef SORT_ATOM_NUMBERS if (dim == 4) { buildkdtree(tn, yn, zn, xn, on, wn, start, lower, kdpptr, that->lo, x, p+1); } else { buildkdtree(tn, yn, xn, wn, on, zn, start, lower, kdpptr, that->lo, x, p+1); } #else if (dim == 4) { buildkdtree(tn, yn, zn, wn, xn, on, start, lower, kdpptr, that->lo, x, p+1); } else { buildkdtree(tn, yn, zn, xn, on, wn, start, lower, kdpptr, that->lo, x, p+1); } #endif } /* * Recurse down the hi branch of the tree if the upper group of * the tn array is non-null. Note permutation of the xn, yn, zn, wn * and tn arrays, as explained above for recursion down the lo * branch of the tree. * * Note that for 3D the wn array isn't allocated so don't permute it * cyclically along with the other arrays in the recursive call. * * Note also that if SORT_ATOM_NUMBERS isn't defined the on array isn't * allocated so don't permute it cyclically in the recursive call. */ if (upper > middle) { (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->hi = (*kdpptr)++; #ifndef SORT_ATOM_NUMBERS if (dim == 4) { buildkdtree(tn, yn, zn, xn, on, wn, middle+1, end, kdpptr, that->hi, x, p+1); } else { buildkdtree(tn, yn, xn, wn, on, zn, middle+1, end, kdpptr, that->hi, x, p+1); } #else if (dim == 4) { buildkdtree(tn, yn, zn, wn, xn, on, middle+1, end, kdpptr, that->hi, x, p+1); } else { buildkdtree(tn, yn, zn, xn, on, wn, middle+1, end, kdpptr, that->hi, x, p+1); } #endif } } } /*********************************************************************** SEARCHKDTREE() ************************************************************************/ /* * Walk the kd tree and generate the pair lists for the upper and lower * triangles. The pair lists are partially ordered in descending atom * number if SORT_ATOM_NUMBERS is defined. Descending order is preferred * by the subsequent heap sort of the pair lists that will occur if * HEAP_SORT_PAIRS is defined. * * Calling parameters are as follows: * * that - the node currently visited, equivalent to 'this' in C++ * x - atomic coordinate array * p - the partition (x, y, z, w or o) on which sorting occurs * q - the query atom number * loindexp - pointer to pair count array index for the lower triangle * upindexp - pointer to pair count array index for the upper triangle * lopairlist - the pair list for the lower triangle * uppairlist - the pair list for the upper triangle */ static void searchkdtree( KDNODE_T *that, REAL_T *x, int p, int q, int *loindexp, int *upindexp, int *lopairlist, int *uppairlist ) { REAL_T xij, yij, zij, wij, r2; /* * The partition cycles by dim unless SORT_ATOM_NUMBERS is defined, * in which case it cycles by dim+1. Note that if SORT_ATOM_NUMBERS * is defined and the partition equals zero, sorting has occured * on the ordinal atom number instead of the atom's cartesian * coordinate. */ #ifndef SORT_ATOM_NUMBERS p %= dim; #else p %= (dim + 1); #endif /* * Search the high branch of the tree if the atomic coordinate of the * query atom plus the cutoff radius is greater than or equal to the * atomic coordinate of the kd node atom. * * If SORT_ATOM_NUMBERS is defined and p==0, always search the high branch. */ #ifdef SORT_ATOM_NUMBERS if ( ((p == 0) && (that->hi != NULL)) || ((p != 0) && (that->hi != NULL) && (x[dim * q + p-1] + cut >= x[dim * that->n + p-1])) ) #else if ( (that->hi != NULL) && (x[dim * q + p] + cut >= x[dim * that->n + p]) ) #endif { searchkdtree(that->hi, x, p+1, q, loindexp, upindexp, lopairlist, uppairlist); } /* * If the query atom number does not equal the kd tree node atom number * and at least one of the two atoms is not frozen, calculate the interatomic * distance and add the kd tree node atom to one of the pair lists if the * distance is less than the cutoff distance. The atom belongs on the lower * triangle pair list if the atom number is less than the query node atom * number. Otherwise, it belongs on the upper triangle pair list. */ if ( ( q != that->n ) && ( !frozen[q] || !frozen[that->n] ) ){ xij = x[dim * q + 0] - x[dim * that->n + 0]; yij = x[dim * q + 1] - x[dim * that->n + 1]; zij = x[dim * q + 2] - x[dim * that->n + 2]; r2 = xij * xij + yij * yij + zij * zij; if (dim == 4) { wij = x[dim * q + 3] - x[dim * that->n + 3]; r2 += wij * wij; } if (r2 < cut2) { if (that->n < q) { lopairlist[*loindexp] = that->n; (*loindexp)++; } else { uppairlist[*upindexp] = that->n; (*upindexp)++; } } } /* * Search the low branch of the tree if the atomic coordinate of the * query atom minus the cutoff radius is less than the atomic coordinate * of the kd node atom. * * If SORT_ATOM_NUMBERS is defined and p==0, always search the low branch. */ #ifdef SORT_ATOM_NUMBERS if ( ((p == 0) && (that->lo != NULL)) || ((p != 0) && (that->lo != NULL) && (x[dim * q + p-1] - cut < x[dim * that->n + p-1])) ) #else if ( (that->lo != NULL) && (x[dim * q + p] - cut < x[dim * that->n + p]) ) #endif { searchkdtree(that->lo, x, p+1, q, loindexp, upindexp, lopairlist, uppairlist); } } /*********************************************************************** NBLIST() ************************************************************************/ /* * Create the non-bonded pairlist using a kd-tree. The kd-tree nodes * are allocated from the kdtree array. * * Calling parameters are as follows: * * lpears - the number of pairs on the lower triangle pair list * upears - the number of pairs on the upper triangle pair list * pearlist - the pair list, contiguous for the upper and lower triangles * x - atomic coordinate array * context_PxQ - the ScaLAPACK context * derivs - the derivative flag: -1 for 2nd derivs, 1 for 1st derivs * * This function returns the total number of pairs. */ static int nblist( int *lpears, int *upears, int **pearlist, REAL_T *x, int context_PxQ, int derivs) { int i, j, locnt, upcnt, totpair, numthreads, threadnum; int *xn, *yn, *zn, *wn, *on, *tn, *lopairlist, *uppairlist; KDNODE_T *kdtree, *kdptr, *root; #ifdef SCALAPACK int myrow, mycol, nprow, npcol, lotot, uptot; int *lopearlist, *uppearlist, *divblk, *modrow; #endif /* Square the cutoff distance for use in searchkdtree. */ cut2 = cut * cut; /* Allocate the kdtree array that must hold one node per atom. */ if ( (kdtree = (KDNODE_T *) malloc(prm->Natom*sizeof(KDNODE_T))) == NULL ) { printf("Error allocate kdnode array in nbtree!\n"); exit (0); } /* * Allocate, initialize and sort the arrays that hold the results of the * heapsort on x,y,z. These arrays are used as pointers (via array indices) * into the atomic coordinate array x. Allocate an additional temp array * so that the buildkdtree function can cycle through x,y,z. Also allocate * and sort an additional array for the w coordinate if dim==4, and * allocate an array for the ordinal atom number if SORT_ATOM_NUMBERS is * defined. * * The temp array and the ordinal atom array are not sorted. */ xn = ivector(0, prm->Natom); yn = ivector(0, prm->Natom); zn = ivector(0, prm->Natom); tn = ivector(0, prm->Natom); if (dim == 4) { wn = ivector(0, prm->Natom); } #ifdef SORT_ATOM_NUMBERS on = ivector(0, prm->Natom); #endif for (i = 0; i < prm->Natom; i++) { xn[i] = yn[i] = zn[i] = i; if (dim == 4) { wn[i] = i; } #ifdef SORT_ATOM_NUMBERS on[i] = i; #endif } heapsort_index(xn, prm->Natom, x, 0); heapsort_index(yn, prm->Natom, x, 1); heapsort_index(zn, prm->Natom, x, 2); if (dim == 4) { heapsort_index(wn, prm->Natom, x, 3); } /* * Build the kd tree. For 3D the wn array is ignored because it wasn't * allocated. When SORT_ATOM_NUMBERS is not defined the on array is * ignored because it wasn't allocated either. See the recursive calls * to the buildkdtree function from within that function to verify that * arrays that are ignored do not participate in the cyclic permutation * of arrays in the recursive calls. * * But if SORT_ATOM_NUMBERS is defined the xn, yn, zn, wn and on array order * is permuted in the non-recursive call to the buildkdtree function * (below) so that the sort at the root node of the tree occurs on the * ordinal atom number. */ kdptr = kdtree; root = kdptr++; root->lo = NULL; root->hi = NULL; #ifndef SORT_ATOM_NUMBERS buildkdtree(xn, yn, zn, wn, on, tn, 0, prm->Natom-1, &kdptr, root, x, 0); #else buildkdtree(on, xn, yn, zn, wn, tn, 0, prm->Natom-1, &kdptr, root, x, 0); #endif /* * Search the kd tree with each atom and record pairs into temporary * arrays for the lower and upper triangle pair lists for one atom. * Copy the temporary pair lists into a pair list array that is * allocated separately for each atom and that contains the lower * and upper triangle pair lists contiguously packed. * * The pairlist array is an array of pair list arrays. */ totpair = 0; #pragma omp parallel reduction (+: totpair) \ private (i, j, locnt, upcnt, lopairlist, uppairlist, threadnum, numthreads) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP, ScaLAPACK or MPI. These variables are not * used for single-threaded execution. * * If MPI is defined, the manner of assignment of the threadnum * and numthreads variables depends upon derivs, as follows. * * If derivs >= 0, the call to nblist is intended to build * a pair list for the first derivative calculation. Therefore, * the threadnum and numthreads variables are assigned in * a row cyclic manner that is required for the first derivative * calculation. This row cyclic approach makes optimal use of * the MPI tasks in that each task has the minimum number of * pair lists for the first derivative calculation. * * If derivs < 0 the call to nblist is intended to build a pair * list for the Born second derivative calculation. However, * the Born second derivative calculation is not parallelized * for MPI. Therefore, the pair list will be fully populated * for each MPI task. * * If OPENMP is defined, the threadnum and numthreads variables * are assigned in a row cyclic manner for both the first and * second derivative calculations. Thus for both calculations * each OpenMP thread has the minimum number of pair lists. * * If SCALAPACK is defined, the manner of assignment of the threadnum * and numthreads variables depends upon derivs variable, as follows. * * If derivs >= 0, the call to nblist is intended to build * a pair list for the first derivative calculation. Therefore, * the threadnum and numthreads variables are assigned in * a row cyclic manner that is required for the first derivative * calculation. As in the MPI case, each MPI task has the minimum * number of pair list for the first derivative calculation. * * If derivs < 0, the call to nblist is intended to build a pair * list either for the Born second derivative calculation or for * the nonbonded second derivative calculation. For the nonbonded * case, the calculation is not parallelized; therefore, the pair * list will be fully populated for each ScaLAPACK process. * * For the Born case, the threadnum and numthreads variables are * assigned from the process column and the number of process columns, * respectively. Each row of a particular column receives the same * pair list from the search of the kd tree, but thereafter the * pair list is culled using the process row and number of process * rows so that each row ultimately receives a unique pair list. * Thus, although for the Born second derivative calculation each process * column receives more pair lists than does each MPI task for the first * derivative calculation, the pair lists are shorter, and therefore the * total number of pairs that are processed by each process column is the * same as in the first derivative calculation, with the following. * caveat. For processes that do not lie of the process grid, the * process column is -1 which will result in no pair list for that process * due to the fact that the myroc function returns 0. Hence for the * second derivative calculation, fewer processes have pair lists than * for the first derivative calculation unless the total number of * MPI tasks is a square such as 1, 4, 9, et cetera. * */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #elif defined(SCALAPACK) if (derivs < 0) { if (gb) { blacs_gridinfo_(&context_PxQ, &nprow, &npcol, &myrow, &mycol); threadnum = mycol; numthreads = npcol; } else { threadnum = 0; numthreads = 1; } } else { threadnum = mytaskid; numthreads = numtasks; } #elif defined(MPI) if (derivs < 0) { threadnum = 0; numthreads = 1; } else { threadnum = mytaskid; numthreads = numtasks; } #endif /* * Allocate the temporary arrays for the lower and upper triangles. * These arrays must be large enough to hold a maximum size pair * list for one atom. For the ScaLAPACK second derivatives an * extra set of temporary arrays is used from which the final pair * lists are culled. * * Also, for ScaLAPACK allocate and initialize lookup tables * for division and modulus operations. */ lopairlist = ivector( 0, prm->Natom ); uppairlist = ivector( 0, prm->Natom ); #ifdef SCALAPACK if (derivs < 0 && gb) { lopearlist = ivector( 0, prm->Natom ); uppearlist = ivector( 0, prm->Natom ); divblk = ivector( 0, prm->Natom ); modrow = ivector( 0, prm->Natom ); for (i = 0; i < prm->Natom; i++) { divblk[i] = i / BLOCKSIZE; modrow[i] = i % nprow; } } #endif /* * Search the kd tree with each atom. If no pair list array * has been allocated and there are pair atoms, allocate a * pair list array. If a pair list array has been allocated * but it is too small for the number of pair atoms, deallocate * the pair list array and allocate a larger array. If it is * at least 33% larger than is necessary for the pair atoms, * deallocate it and allocate a smaller pair list array. Copy the * lower and upper triangle pair lists into the pair list array, * packed contiguously with the lower triangle pair list followed * by the upper triangle pair list. * * Explicitly assign threads to loop indices for the following loop, * in a manner equivalent to (static, N) scheduling with OpenMP, * and identical to the manner in which threads are assigned in * nbond, egb and egb2. * * There is an implied barrier end of this OpenMP parallel region. * Because the following loop is the only loop in this parallel * region, there is no need for an explicit barrier. Furthermore, * all OpenMP parallelized loops that use the pair list also use * loop index to thread mapping that is identical to what is used for * this loop. Hence, no race condition would exist even if OpenMP * threads were not synchronized at the end of this parallel region * because each thread constructs the pair list that it uses thereafter. * This same argument applies to MPI tasks: no synchronization is * necessary. */ for (i = 0; i < prm->Natom; i++) { #if defined(OPENMP) || defined(MPI) || defined(SCALAPACK) if ( !myroc(i, BLOCKSIZE, numthreads, threadnum) ) continue; #endif #ifdef SCALAPACK if (derivs < 0 && gb) { lotot = uptot = 0; searchkdtree(root, x, 0, i, &lotot, &uptot, lopearlist, uppearlist); } else { locnt = upcnt = 0; searchkdtree(root, x, 0, i, &locnt, &upcnt, lopairlist, uppairlist); } #else locnt = upcnt = 0; searchkdtree(root, x, 0, i, &locnt, &upcnt, lopairlist, uppairlist); #endif /* * If SORT_ATOM_NUMBERS is defined, the upper and lower triangle * pair lists are partially sorted by ordinal atom number using * the kd tree. * * If HEAP_SORT_PAIRS is defined, sort the upper and lower triangle * pair lists using heap sort. * * If the pair lists are sorted by ordinal atom number using the * kd tree, the subsequent heap sort of the pair lists is quicker, * but the kd tree sort is not necessary. */ #define HEAP_SORT_PAIRS #ifdef HEAP_SORT_PAIRS #ifdef SCALAPACK if (derivs < 0 && gb) { heapsort_pairs(lopearlist, lotot); heapsort_pairs(uppearlist, uptot); } else { heapsort_pairs(lopairlist, locnt); heapsort_pairs(uppairlist, upcnt); } #else heapsort_pairs(lopairlist, locnt); heapsort_pairs(uppairlist, upcnt); #endif #endif /* * For the ScaLAPACK second derivatives cull the pair lists * by copying to the final pair lists only those atoms that * are active in a particular process row. Use the lookup * tables for a faster form of calls to myroc of the form: * * myroc(lopearlist[j], BLOCKSIZE, npcol, mycol) */ #ifdef SCALAPACK if (derivs < 0 && gb) { locnt = 0; for (j = 0; j < lotot; j++) { if ( myrow >= 0 && modrow[divblk[lopearlist[j]]] == myrow ) { lopairlist[locnt++] = lopearlist[j]; } } upcnt = 0; for (j = 0; j < uptot; j++) { if ( myrow >= 0 && modrow[divblk[uppearlist[j]]] == myrow ) { uppairlist[upcnt++] = uppearlist[j]; } } } #endif if ( ( pearlist[i] == NULL ) && ( locnt + upcnt > 0 ) ) { pearlist[i] = ivector( 0, locnt + upcnt ); } else if ( ( pearlist[i] != NULL ) && ( ( locnt + upcnt > lpears[i] + upears[i] ) || ( 4*(locnt + upcnt) < 3*(lpears[i] + upears[i]) ) ) ) { free_ivector( pearlist[i], 0, lpears[i] + upears[i] ); pearlist[i] = ivector( 0, locnt + upcnt ); } lpears[i] = locnt; upears[i] = upcnt; for (j = 0; j < locnt; j++) { pearlist[i][j] = lopairlist[j]; } for (j = 0; j < upcnt; j++) { pearlist[i][locnt + j] = uppairlist[j]; } totpair += locnt + upcnt; } /* * Deallocate the temporary arrays for the lower and upper triangles. * For ScaLAPACK deallocate the addtional temporary arrays as well as * the lookup tables. */ free_ivector( lopairlist, 0, prm->Natom ); free_ivector( uppairlist, 0, prm->Natom ); #ifdef SCALAPACK if (derivs < 0 && gb) { free_ivector( lopearlist, 0, prm->Natom ); free_ivector( uppearlist, 0, prm->Natom ); free_ivector( divblk, 0, prm->Natom ); free_ivector( modrow, 0, prm->Natom ); } #endif } /* Free the temporary arrays. */ free(kdtree); free_ivector(xn, 0, prm->Natom); free_ivector(yn, 0, prm->Natom); free_ivector(zn, 0, prm->Natom); free_ivector(tn, 0, prm->Natom); if (dim == 4) { free_ivector(wn, 0, prm->Natom); } #ifdef SORT_ATOM_NUMBERS free_ivector(on, 0, prm->Natom); #endif return totpair; } /*********************************************************************** NBOND() ************************************************************************/ /* * Calculate the non-bonded energy and first derivatives. * This function is complicated by the fact that it must * process two forms of pair lists: the 1-4 pair list and * the non-bonded pair list. The non-bonded pair list * must be modified by the excluded atom list whereas the * 1-4 pair list is used unmodified. Also, the non-bonded * pair list comprises lower and upper triangles whereas * the 1-4 pair list comprises an upper triangle only. * * Calling parameters are as follows: * * lpears - the number of pairs on the lower triangle pair list * upears - the number of pairs on the upper trianble pair list * pearlist - either the 1-4 pair list or the non-bonded pair list * N14 - set to 0 for the non-bonded pair list, 1 for the 1-4 pair list * x - the atomic coordinate array * f - the gradient vector * enb - Van der Waals energy return value, passed by reference * eel - Coulombic energy return value, passed by reference * enbfac - scale factor for Van der Waals energy * eelfac - scale factor for Coulombic energy */ static int nbond( int *lpears, int *upears, int **pearlist, int N14, REAL_T *x, REAL_T *f, REAL_T *enb, REAL_T *eel, REAL_T enbfac, REAL_T eelfac ) { int i, j, jn, ic, npr, lpair, iaci, foff, threadnum, numthreads; int *iexw; REAL_T dumx, dumy, dumz, dumw, cgi, r2inv, df2, r6, r10, f1, f2; REAL_T dedx, dedy, dedz, dedw, df, enbfaci, eelfaci, evdw, elec; REAL_T xi, yi, zi, wi, xij, yij, zij, wij, r, r2; REAL_T dis, kij, d0, diff, rinv, rs, rssq, eps1, epsi, cgijr, pow; int nhbpair, ibig, isml; #define SIG 0.3 #define DIW 78.0 #define C1 38.5 evdw = 0.; elec = 0.; enbfaci = 1. / enbfac; eelfaci = 1. / eelfac; #pragma omp parallel reduction (+: evdw, elec) \ private (i, j, iexw, npr, iaci, \ xi, yi, zi, wi, xij, yij, zij, wij, dumx, dumy, dumz, dumw, \ cgi, jn, r2, r2inv, r, rinv, rs, rssq, pow, \ eps1, epsi, cgijr, df2, ic, r6, f2, f1, df, dis, d0, kij, \ diff, ibig, isml, dedx, dedy, dedz, dedw, r10, \ threadnum, numthreads, foff) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute an offset into the gradient array for this thread, * but only if OPENMP is defined. */ #ifdef OPENMP foff = dim * prm->Natom * threadnum; #else foff = 0; #endif /* * Allocate and initialize the iexw array used for skipping excluded * atoms. Note that because of the manner in which iexw is used, it * is necessary to initialize it before only the first iteration of * the following loop. */ iexw = ivector(-1, prm->Natom); for (i = -1; i < prm->Natom; i++) { iexw[i] = -1; } /* * Loop over all atoms i except for the final atom i. * * Explicitly assign threads to loop indices for the following loop, * in a manner equivalent to (static, N) scheduling with OpenMP, and * identical to the manner in which threads are assigned in nblist. * * Synchronization of OpenMP threads will occur following this loop * because the parallel region ends after this loop. Following * synchronization, a reduction of the sumdeijda array will be * performed. * * Synchronization of MPI tasks will occur via the MPI_Allreduce * function that is called from within mme34. */ for (i = 0; i < prm->Natom - 1; i++) { #if defined(OPENMP) || defined(MPI) || defined(SCALAPACK) if ( !myroc(i, BLOCKSIZE, numthreads, threadnum) ) continue; #endif /* Check whether there are any atoms j on the pair list of atom i. */ npr = upears[i]; if (npr <= 0) continue; iaci = prm->Ntypes * (prm->Iac[i] - 1); dumx = dumy = dumz = 0.0; xi = x[dim * i + 0]; yi = x[dim * i + 1]; zi = x[dim * i + 2]; if (dim == 4) { dumw = 0.0; wi = x[dim * i + 3]; } cgi = eelfaci * prm->Charges[i]; /* * Expand the excluded list into the iexw array by storing i * at array address j. */ for (j = 0; j < prm->Iblo[i]; j++) { iexw[IexclAt[i][j] - 1] = i; } /* * If the 'N14' calling parameter is clear, use the beginning * address of the upper triangle pair list, which happens * to be the number of atoms on the lower triangle pair list. * If the 'N14' calling parameter is set, the beginning * address is zero because no lower triangle pair list is * used for the N14 interactions. */ if (N14 == 0) { lpair = lpears[i]; } else { lpair = 0; } /* Select atoms j from the pair list. Non-graceful error handling. */ for (jn = 0; jn < npr; jn++) { if (pearlist[i] == NULL) { printf("NULL pair list entry in nbond, taskid = %d\n", mytaskid); fflush(stdout); } j = pearlist[i][lpair + jn]; /* * If the 'N14' calling parameter is clear, check whether * this i,j pair is exempted by the excluded atom list. */ if (N14 != 0 || iexw[j] != i) { xij = xi - x[dim * j + 0]; yij = yi - x[dim * j + 1]; zij = zi - x[dim * j + 2]; r2 = xij * xij + yij * yij + zij * zij; if (dim == 4) { wij = wi - x[dim * j + 3]; r2 += wij * wij; } r2inv = 1.0 / r2; r = sqrt(r2); rinv = r * r2inv; /* Calculate the energy and derivatives according to dield. */ if (dield == -3) { /* special code Ramstein & Lavery dielectric, 94 force field */ rs = SIG * r; rssq = rs * rs; pow = exp(-rs); eps1 = rssq + rs + rs + 2.0; epsi = 1.0 / (DIW - C1 * pow * eps1); cgijr = cgi * prm->Charges[j] * rinv * epsi; elec += cgijr; df2 = -cgijr * (1.0 + C1 * pow * rs * rssq * epsi); ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6 = r2inv * r2inv * r2inv; f2 = prm->Cn2[ic] * r6; f1 = prm->Cn1[ic] * r6 * r6; evdw += (f1 - f2) * enbfaci; df = (df2 + (6.0 * f2 - 12.0 * f1) * enbfaci) * rinv; } else { df = df2 * rinv; } } else if (dield == -4) { /* distance-dependent dielectric code, 94 ff */ /* epsilon = r */ rs = cgi * prm->Charges[j] * r2inv; df2 = -2.0 * rs; elec += rs; ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6 = r2inv * r2inv * r2inv; f2 = prm->Cn2[ic] * r6; f1 = prm->Cn1[ic] * r6 * r6; evdw += (f1 - f2) * enbfaci; df = (df2 + (6.0 * f2 - 12.0 * f1) * enbfaci) * rinv; } else { df = df2 * rinv; } } else if (dield == -5) { /* non-bonded term from yammp */ dis = r; ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; d0 = prm->Cn2[ic]; if (dis < d0) { kij = prm->Cn1[ic]; diff = dis - d0; evdw += kij * diff * diff; df = 2.0 * kij * diff; } else { df = 0.0; } } else { /* * Code for various dielectric models. * The df2 variable should hold r(dV/dr). */ if (dield == 0) { /* epsilon = r */ rs = cgi * prm->Charges[j] * r2inv; df2 = -2.0 * rs; elec += rs; } else if (dield == 1) { /* epsilon = 1 */ rs = cgi * prm->Charges[j] * rinv; df2 = -rs; elec += rs; } else if (dield == -2) { /* Ramstein & Lavery dielectric, PNAS 85, 7231 (1988). */ rs = SIG * r; rssq = rs * rs; pow = exp(-rs); eps1 = rssq + rs + rs + 2.0; epsi = 1.0 / (DIW - C1 * pow * eps1); cgijr = cgi * prm->Charges[j] * rinv * epsi; elec += cgijr; df2 = -cgijr * (1.0 + C1 * pow * rs * rssq * epsi); } /* Calculate either Van der Waals or hydrogen bonded term. */ ic = prm->Cno[iaci + prm->Iac[j] - 1]; if (ic > 0 || enbfac != 1.0) { if (ic > 0) { ic--; } else { ibig = prm->Iac[i] > prm->Iac[j] ? prm->Iac[i] : prm->Iac[j]; isml = prm->Iac[i] > prm->Iac[j] ? prm->Iac[j] : prm->Iac[i]; ic = ibig * (ibig - 1) / 2 + isml - 1; } r6 = r2inv * r2inv * r2inv; f2 = prm->Cn2[ic] * r6; f1 = prm->Cn1[ic] * r6 * r6; evdw += (f1 - f2) * enbfaci; df = (df2 + (6.0 * f2 - 12.0 * f1) * enbfaci) * rinv; #if 0 if( enbfac != 1.0 ) nb14 += (f1-f2) * enbfaci; #endif } else { ic = -ic - 1; r10 = r2inv * r2inv * r2inv * r2inv * r2inv; f2 = prm->HB10[ic] * r10; f1 = prm->HB12[ic] * r10 * r2inv; evdw += (f1 - f2) * enbfaci; df = (df2 + (10.0 * f2 - 12.0 * f1) * enbfaci) * rinv; nhbpair++; #if 0 hbener += (f1-f2) * enbfaci; #endif } } /* * Divide by r here instead dividing the derivatives by r, * and update the gradient for atom j. */ df *= rinv; dedx = df * xij; dedy = df * yij; dedz = df * zij; dumx += dedx; dumy += dedy; dumz += dedz; f[foff + dim * j + 0] -= dedx; f[foff + dim * j + 1] -= dedy; f[foff + dim * j + 2] -= dedz; if (dim == 4) { dedw = df * wij; dumw += dedw; f[foff + dim * j + 3] -= dedw; } } } /* For atom i, the gradient is updated in the i-loop only. */ f[foff + dim * i + 0] += dumx; f[foff + dim * i + 1] += dumy; f[foff + dim * i + 2] += dumz; if (dim == 4) { f[foff + dim * i + 3] += dumw; } } /* Deallocate the iexw array within this potentially parallel region. */ free_ivector(iexw, -1, prm->Natom); } /* Return evdw and elec through by-reference calling parameters. */ *enb = evdw; *eel = elec; return (0); } /*********************************************************************** EGB() ************************************************************************/ /* * Calculate the generalized Born energy and first derivatives. * * Calling parameters are as follows: * * lpears - the number of pairs on the lower triangle pair list * upears - the number of pairs on the upper trianble pair list * pearlist - the pair list, contiguous for the upper and lower triangles * x - input: the atomic (x,y,z) coordinates * f - updated: the gradient vector * fs - input: overlap parameters * rborn - input: atomic radii * q - input: atomic charges * kappa - input: inverse of the Debye-Huckel length * diel_ext - input: solvent dielectric constant * enb - updated: Lennard-Jones energy * eelt - updated: gas-phase electrostatic energy * freevectors - if !=0 free the static vectors and return */ static REAL_T egb( int *lpears, int *upears, int **pearlist, REAL_T *x, REAL_T *f, REAL_T *fs, REAL_T *rborn, REAL_T *q, REAL_T *kappa, REAL_T *diel_ext, REAL_T *enb, REAL_T *eelt, INT_T freevectors ) #define BOFFSET 0.09 #define KSCALE 0.73 #define PIx4 12.5663706143591724639918 #define PIx2 6.2831853071795862319959 #define PIx1 3.1415926535897931159979 { #if defined(MPI) || defined(SCALAPACK) int ierror; static REAL_T *reductarr=NULL; #endif static REAL_T *reff=NULL, *sumdeijda=NULL, *psi=NULL; int i, i34, j, j34, k, threadnum, numthreads, maxthreads, foff, soff; int npairs, ic, iaci; int *iexw; size_t n; REAL_T epol, dielfac, qi, qj, qiqj, fgbi, fgbk, rb2, expmkf; REAL_T elec, evdw, sumda, daix, daiy, daiz, daiw; REAL_T xi, yi, zi, wi, xij, yij, zij, wij; REAL_T dedx, dedy, dedz, dedw, de; REAL_T dij1i, dij3i, temp1; REAL_T qi2h, qid2h, datmp; REAL_T theta, ri1i, dij2i; REAL_T dij, sumi; REAL_T eel, f6, f12, rinv, r2inv, r6inv; REAL_T r2, ri, rj, sj, sj2, thi; REAL_T uij, efac, temp4, temp5, temp6; /* LCPO stuff follows */ int count, count2, icount; REAL_T si, sumAij, sumAjk, sumAijAjk, rij, tmpaij, Aij, dAijddij; REAL_T sumdAijddijdxi, sumdAijddijdyi, sumdAijddijdzi, sumdAijddijdwi; REAL_T sumdAijddijdxiAjk, sumdAijddijdyiAjk, sumdAijddijdziAjk, sumdAijddijdwiAjk; REAL_T dAijddijdxj, dAijddijdyj, dAijddijdzj, dAijddijdwj; REAL_T sumdAjkddjkdxj, sumdAjkddjkdyj, sumdAjkddjkdzj, sumdAjkddjkdwj, p3p4Aij; REAL_T xk, yk, zk, wk, rjk2, djk1i, rjk, vdw2dif, tmpajk, Ajk, sumAjk2, dAjkddjk; REAL_T dAjkddjkdxj, dAjkddjkdyj, dAjkddjkdzj, dAjkddjkdwj; REAL_T lastxj, lastyj, lastzj, lastwj; REAL_T dAidxj, dAidyj, dAidzj, dAidwj, Ai, dAidxi, dAidyi, dAidzi, dAidwi; REAL_T totsasa; REAL_T xj, yj, zj, wj; REAL_T dumbo,tmpsd; REAL_T rgbmax1i, rgbmax2i, rgbmaxpsmax2; /*FGB taylor coefficients follow */ /* from A to H : */ /* 1/3 , 2/5 , 3/7 , 4/9 , 5/11 */ /* 4/3 , 12/5 , 24/7 , 40/9 , 60/11 */ #define TA 0.33333333333333333333 #define TB 0.4 #define TC 0.42857142857142857143 #define TD 0.44444444444444444444 #define TDD 0.45454545454545454545 #define TE 1.33333333333333333333 #define TF 2.4 #define TG 3.42857142857142857143 #define TH 4.44444444444444444444 #define THH 5.45454545454545454545 /* * Determine the size of the sumdeijda array. If OPENMP is defined, * a copy of this array must be allocated for each thread; otherwise * only one copy is allocated. */ #ifdef OPENMP maxthreads = omp_get_max_threads(); #else maxthreads = 1; #endif n = (size_t)prm->Natom; /* * If freevectors != 0, deallocate the static arrays that have been * previously allocated and return. */ if (freevectors != 0) { if (reff != NULL) free_vector(reff, 0, n); reff = NULL; if (sumdeijda != NULL) free_vector(sumdeijda, 0, maxthreads*n); sumdeijda = NULL; if (psi != NULL) free_vector(psi, 0, n); psi = NULL; #if defined(MPI) || defined(SCALAPACK) if (reductarr != NULL) free_vector(reductarr, 0, n); reductarr = NULL; #endif return (0.0); } /* * Smooth "cut-off" in calculating GB effective radii. * Implementd by Andreas Svrcek-Seiler and Alexey Onufriev. * The integration over solute is performed up to rgbmax and includes * parts of spheres; that is an atom is not just "in" or "out", as * with standard non-bonded cut. As a result, calclated effective * radii are less than rgbmax. This saves time, and there is no * discontinuity in dReff/drij. * * Only the case rgbmax > 5*max(sij) = 5*fsmax ~ 9A is handled; this is * enforced in mdread(). Smaller values would not make much physical * sense anyway. * * Note: rgbmax must be less than or equal to cut so that the pairlist * generated from cut may be applied to calculation of the effective * radius and its derivatives. */ if (rgbmax > cut) { printf("Error in egb: rgbmax = %f is greater than cutoff = %f\n", rgbmax, cut); exit(1); } rgbmax1i = 1.0/rgbmax; rgbmax2i = rgbmax1i*rgbmax1i; rgbmaxpsmax2 = (rgbmax+prm->Fsmax)*(rgbmax+prm->Fsmax); /* Allocate some static arrays if they have not been allocated already. */ if (reff == NULL) reff = vector(0, n); if (sumdeijda == NULL) sumdeijda = vector(0, maxthreads*n); if ( (psi == NULL) && (gb==2 || gb==5) ) psi = vector(0, n); #if defined(MPI) || defined(SCALAPACK) if (reductarr == NULL) reductarr = vector(0, n); #endif count = 0; totsasa = 0.0; if (gb_debug) printf("Effective Born radii:\n"); /* * Get the "effective" Born radii via the approximate pairwise method * Use Eqs 9-11 of Hawkins, Cramer, Truhlar, J. Phys. Chem. 100:19824 * (1996). * * This computation is not executed in parallel for the case of gbsa==1 * due to the loop-carried dependence of the 'count' variable. However, * an apparent bug in the Sun implementation of OpenMP may cause this * computation to execute more slowly than in serial mode in the case * where an 'if (gbsa != 1)' clause is added to the following pragma. * To avoid this problem, the code is replicated verbatim and selected * with an 'if (gbsa != 1)' statement. When gbsa != 1 the pragma version * is selected; otherwise, the non-pragma version is selected. */ if (gbsa != 1) { /* * For MPI or ScaLAPACK, initialize all elements of the reff array. * Although each task will calculate only a subset of the elements, * a reduction is used to combine the results from all tasks. * If a gather were used instead of a reduction, no initialization * would be necessary. */ #if defined(MPI) || defined(SCALAPACK) for (i = 0; i < prm->Natom; i++) { reff[i] = 0.0; } #endif #pragma omp parallel \ private (i, xi, yi, zi, wi, ri, ri1i, sumi, j, k, xij, yij, zij, wij, \ r2, dij1i, dij, sj, sj2, uij, dij2i, tmpsd, dumbo, theta, \ threadnum, numthreads) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Loop over all atoms i. * * Explicitly assign threads to loop indices for the following loop, * in a manner equivalent to (static, N) scheduling with OpenMP, and * identical to the manner in which threads are assigned in nblist. * * The reff array is written in the following loops. It is necessary to * synchronize the OpenMP threads or MPI tasks that execute these loops * following loop execution so that a race condition does not exist for * reading the reff array before it is written. Even if all subsequent * loops use loop index to thread or task mapping that is identical to * that of the following loop, elements of the reff array are indexed by * other loop indices, so synchronization is necessary. * * OpenMP synchronization is accomplished by the implied barrier * at the end of this parallel region. MPI synchronization is * accomplished by MPI_Allreduce. */ for (i = 0; i < prm->Natom; i++) { #if defined(OPENMP) || defined(MPI) || defined(SCALAPACK) if ( !myroc(i, BLOCKSIZE, numthreads, threadnum) ) continue; #endif xi = x[dim * i ]; yi = x[dim * i + 1]; zi = x[dim * i + 2]; if (dim == 4) { wi = x[dim * i + 3]; } ri = rborn[i] - BOFFSET; ri1i = 1. / ri; sumi = 0.0; /* Select atom j from the pair list. Non-graceful error handling. */ for (k = 0; k < lpears[i] + upears[i]; k++) { if (pearlist[i] == NULL) { printf("NULL pair list entry in egb loop 1, taskid = %d\n", mytaskid); fflush(stdout); } j = pearlist[i][k]; xij = xi - x[dim * j ]; yij = yi - x[dim * j + 1]; zij = zi - x[dim * j + 2]; r2 = xij * xij + yij * yij + zij * zij; if (dim == 4) { wij = wi - x[dim * j + 3]; r2 += wij * wij; } if ( r2 > rgbmaxpsmax2 ) continue; dij1i = 1.0 / sqrt(r2); dij = r2 * dij1i; sj = fs[j] * (rborn[j] - BOFFSET); sj2 = sj * sj; /* * ---following are from the Appendix of Schaefer and Froemmel, * JMB 216:1045-1066, 1990; Taylor series expansion for d>>s * is by Andreas Svrcek-Seiler; smooth rgbmax idea is from * Andreas Svrcek-Seiler and Alexey Onufrie. */ if( dij > rgbmax + sj ) continue; if ((dij > rgbmax - sj)) { uij = 1./(dij -sj); sumi -= 0.125 * dij1i * (1.0 + 2.0 * dij *uij + rgbmax2i * (r2 - 4.0 * rgbmax * dij - sj2) + 2.0 * log((dij-sj)*rgbmax1i)); } else if (dij >4.0*sj) { dij2i = dij1i*dij1i; tmpsd = sj2*dij2i; dumbo = TA+tmpsd*(TB+tmpsd*(TC+tmpsd*(TD+tmpsd*TDD))); sumi -= sj*tmpsd*dij2i*dumbo; } else if (dij > ri + sj) { sumi -= 0.5 * (sj / (r2 - sj2) + 0.5 * dij1i * log((dij - sj) / (dij + sj))); } else if (dij > fabs(ri - sj)) { theta = 0.5 * ri1i * dij1i * (r2 + ri * ri - sj2); uij = 1. / (dij + sj); sumi -= 0.25 * (ri1i * (2. - theta) - uij + dij1i * log(ri * uij)); } else if (ri < sj) { sumi -= 0.5 * (sj / (r2 - sj2) + 2. * ri1i + 0.5 * dij1i * log((sj - dij) / (sj + dij))); } } if( gb==1 ) { /* "standard" (HCT) effective radii: */ reff[i] = 1.0/(ri1i + sumi); if( reff[i] < 0.0 ) reff[i] = 30.0; } else { /* "gbao" formulas: */ psi[i] = -ri*sumi; reff[i] = 1.0 / (ri1i - tanh( (gbalpha - gbbeta*psi[i] + gbgamma*psi[i]*psi[i]) *psi[i])/rborn[i] ); } if (gb_debug) printf("%d\t%15.7f\t%15.7f\n", i + 1, rborn[i], reff[i]); } } /* The MPI synchronization is accomplished via reduction of the reff array. */ #if defined(MPI) || defined(SCALAPACK) ierror = MPI_Allreduce(reff, reductarr, prm->Natom, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (ierror != MPI_SUCCESS) { printf("Error in egb reff reduction, error = %d mytaskid = %d\n", ierror, mytaskid); } for (i = 0; i < prm->Natom; i++) { reff[i] = reductarr[i]; } #endif } else { /* * Here is the second replication of the code, always executed serially. * Loop over all atoms i. */ for (i = 0; i < prm->Natom; i++) { xi = x[dim * i ]; yi = x[dim * i + 1]; zi = x[dim * i + 2]; if (dim == 4) { wi = x[dim * i + 3]; } ri = rborn[i] - BOFFSET; ri1i = 1. / ri; sumi = 0.0; /* Select atom j from the pair list. Non-graceful error handling. */ for (k = 0; k < lpears[i] + upears[i]; k++) { if (pearlist[i] == NULL) { printf("NULL pair list entry in egb loop 2, taskid = %d\n", mytaskid); fflush(stdout); } j = pearlist[i][k]; xij = xi - x[dim * j ]; yij = yi - x[dim * j + 1]; zij = zi - x[dim * j + 2]; r2 = xij * xij + yij * yij + zij * zij; if (dim == 4) { wij = wi - x[dim * j + 3]; r2 += wij * wij; } if ( r2 > rgbmaxpsmax2 ) continue; dij1i = 1.0 / sqrt(r2); dij = r2 * dij1i; sj = fs[j] * (rborn[j] - BOFFSET); sj2 = sj * sj; /* * ---following are from the Appendix of Schaefer and Froemmel, * JMB 216:1045-1066, 1990; Taylor series expansion for d>>s * is by Andreas Svrcek-Seiler; smooth rgbmax idea is from * Andreas Svrcek-Seiler and Alexey Onufrie. */ if( dij > rgbmax + sj ) continue; if ((dij > rgbmax - sj)) { uij = 1./(dij -sj); sumi -= 0.125 * dij1i * (1.0 + 2.0 * dij *uij + rgbmax2i * (r2 - 4.0 * rgbmax * dij - sj2) + 2.0 * log((dij-sj)*rgbmax1i)); } else if (dij >4.0*sj) { dij2i = dij1i*dij1i; tmpsd = sj2*dij2i; dumbo = TA+tmpsd*(TB+tmpsd*(TC+tmpsd*(TD+tmpsd*TDD))); sumi -= sj*tmpsd*dij2i*dumbo; } else if (dij > ri + sj) { sumi -= 0.5 * (sj / (r2 - sj2) + 0.5 * dij1i * log((dij - sj) / (dij + sj))); } else if (dij > fabs(ri - sj)) { theta = 0.5 * ri1i * dij1i * (r2 + ri * ri - sj2); uij = 1. / (dij + sj); sumi -= 0.25 * (ri1i * (2. - theta) - uij + dij1i * log(ri * uij)); } else if (ri < sj) { sumi -= 0.5 * (sj / (r2 - sj2) + 2. * ri1i + 0.5 * dij1i * log((sj - dij) / (sj + dij))); } if (gbsa == 1) { if ((P0[i] + P0[j]) > dij) { if (P0[i] > 2.5 && P0[j] > 2.5) { ineighbor[count++] = j + 1; } } } } if( gb==1 ) { /* "standard" (HCT) effective radii: */ reff[i] = 1.0/(ri1i + sumi); if( reff[i] < 0.0 ) reff[i] = 30.0; } else { /* "gbao" formulas: */ psi[i] = -ri*sumi; reff[i] = 1.0 / (ri1i - tanh( (gbalpha - gbbeta*psi[i] + gbgamma*psi[i]*psi[i]) *psi[i])/rborn[i] ); } if (gb_debug) printf("%d\t%15.7f\t%15.7f\n", i + 1, rborn[i], reff[i]); /* gbsa==1 */ ineighbor[count++] = 0; } } assert( count <= 30*prm->Natom ); /* * Main LCPO stuff follows. No parallelization using OpenMP due to * the loop-carried dependence of the 'count variable. */ esurf = 0.0; if (gbsa == 1) { totsasa = 0.0; count = 0; for (i = 0; i < prm->Natom; i++) { if (ineighbor[count] == 0) { count = count + 1; } else { si = PIx4 * P0[i] * P0[i]; sumAij = 0.0; sumAjk = 0.0; sumAjk2 = 0.0; sumAijAjk = 0.0; sumdAijddijdxi = 0.0; sumdAijddijdyi = 0.0; sumdAijddijdzi = 0.0; sumdAijddijdwi = 0.0; sumdAijddijdxiAjk = 0.0; sumdAijddijdyiAjk = 0.0; sumdAijddijdziAjk = 0.0; sumdAijddijdwiAjk = 0.0; icount = count; L70:j = ineighbor[count] - 1; /* mind the -1 , fortran again */ xi = x[dim * i ]; yi = x[dim * i + 1]; zi = x[dim * i + 2]; xj = x[dim * j ]; yj = x[dim * j + 1]; zj = x[dim * j + 2]; r2 = (xi - xj) * (xi - xj) + (yi - yj) * (yi - yj) + (zi - zj) * (zi - zj); if (dim == 4) { wi = x[dim * i + 3]; wj = x[dim * j + 3]; r2 += (wi - wj) * (wi - wj); } dij1i = 1. / sqrt(r2); rij = r2 * dij1i; tmpaij = P0[i] - rij * 0.5 - (P0[i] * P0[i] - P0[j] * P0[j]) * 0.5 * dij1i; Aij = PIx2 * P0[i] * tmpaij; dAijddij = PIx1 * P0[i] * (dij1i * dij1i * (P0[i] * P0[i] - P0[j] * P0[j]) - 1.0); dAijddijdxj = dAijddij * (xj - xi) * dij1i; dAijddijdyj = dAijddij * (yj - yi) * dij1i; dAijddijdzj = dAijddij * (zj - zi) * dij1i; if (dim == 4) { dAijddijdwj = dAijddij * (wj - wi) * dij1i; } sumAij = sumAij + Aij; count2 = icount; sumAjk2 = 0.0; sumdAjkddjkdxj = sumdAjkddjkdyj = sumdAjkddjkdzj = 0.0; if (dim == 4) { sumdAjkddjkdwj = 0.0; } p3p4Aij = -surften * (P3[i] + P4[i] * Aij); L80:k = ineighbor[count2] - 1; /*same as above, -1 ! */ if (j == k) goto L85; xk = x[dim * k ]; yk = x[dim * k + 1]; zk = x[dim * k + 2]; rjk2 = (xj - xk) * (xj - xk) + (yj - yk) * (yj - yk) + (zj - zk) * (zj - zk); if (dim == 4) { wk = x[dim * k + 3]; rjk2 += (wj - wk) * (wj - wk); } djk1i = 1.0 / sqrt(rjk2); rjk = rjk2 * djk1i; if (P0[j] + P0[k] > rjk) { vdw2dif = P0[j] * P0[j] - P0[k] * P0[k]; tmpajk = 2.0 * P0[j] - rjk - vdw2dif * djk1i; Ajk = PIx1 * P0[j] * tmpajk; sumAjk = sumAjk + Ajk; sumAjk2 = sumAjk2 + Ajk; dAjkddjk = PIx1 * P0[j] * djk1i * (djk1i * djk1i * vdw2dif - 1.0); dAjkddjkdxj = dAjkddjk * (xj - xk); dAjkddjkdyj = dAjkddjk * (yj - yk); dAjkddjkdzj = dAjkddjk * (zj - zk); f[dim * k ] = f[dim * k ] + dAjkddjkdxj * p3p4Aij; f[dim * k + 1] = f[dim * k + 1] + dAjkddjkdyj * p3p4Aij; f[dim * k + 2] = f[dim * k + 2] + dAjkddjkdzj * p3p4Aij; sumdAjkddjkdxj = sumdAjkddjkdxj + dAjkddjkdxj; sumdAjkddjkdyj = sumdAjkddjkdyj + dAjkddjkdyj; sumdAjkddjkdzj = sumdAjkddjkdzj + dAjkddjkdzj; if (dim == 4) { dAjkddjkdwj = dAjkddjk * (wj - wk); f[dim * k + 3] = f[dim * k + 3] + dAjkddjkdwj * p3p4Aij; sumdAjkddjkdwj = sumdAjkddjkdwj + dAjkddjkdwj; } } L85:count2 = count2 + 1; if (ineighbor[count2] != 0) { goto L80; } else { count2 = icount; } sumAijAjk = sumAijAjk + Aij * sumAjk2; sumdAijddijdxi = sumdAijddijdxi - dAijddijdxj; sumdAijddijdyi = sumdAijddijdyi - dAijddijdyj; sumdAijddijdzi = sumdAijddijdzi - dAijddijdzj; sumdAijddijdxiAjk = sumdAijddijdxiAjk - dAijddijdxj * sumAjk2; sumdAijddijdyiAjk = sumdAijddijdyiAjk - dAijddijdyj * sumAjk2; sumdAijddijdziAjk = sumdAijddijdziAjk - dAijddijdzj * sumAjk2; lastxj = dAijddijdxj * sumAjk2 + Aij * sumdAjkddjkdxj; lastyj = dAijddijdyj * sumAjk2 + Aij * sumdAjkddjkdyj; lastzj = dAijddijdzj * sumAjk2 + Aij * sumdAjkddjkdzj; dAidxj = surften * (P2[i] * dAijddijdxj + P3[i] * sumdAjkddjkdxj + P4[i] * lastxj); dAidyj = surften * (P2[i] * dAijddijdyj + P3[i] * sumdAjkddjkdyj + P4[i] * lastyj); dAidzj = surften * (P2[i] * dAijddijdzj + P3[i] * sumdAjkddjkdzj + P4[i] * lastzj); f[dim * j ] = f[dim * j ] + dAidxj; f[dim * j + 1] = f[dim * j + 1] + dAidyj; f[dim * j + 2] = f[dim * j + 2] + dAidzj; if (dim == 4) { sumdAijddijdwi = sumdAijddijdwi - dAijddijdwj; sumdAijddijdwiAjk = sumdAijddijdwiAjk - dAijddijdwj * sumAjk2; lastwj = dAijddijdwj * sumAjk2 + Aij * sumdAjkddjkdwj; dAidwj = surften * (P2[i] * dAijddijdwj + P3[i] * sumdAjkddjkdwj + P4[i] * lastwj); f[dim * j + 3] = f[dim * j + 3] + dAidwj; } count = count + 1; if (ineighbor[count] != 0) { goto L70; } else { count = count + 1; } Ai = P1[i] * si + P2[i] * sumAij + P3[i] * sumAjk + P4[i] * sumAijAjk; dAidxi = surften * (P2[i] * sumdAijddijdxi + P4[i] * sumdAijddijdxiAjk); dAidyi = surften * (P2[i] * sumdAijddijdyi + P4[i] * sumdAijddijdyiAjk); dAidzi = surften * (P2[i] * sumdAijddijdzi + P4[i] * sumdAijddijdziAjk); f[dim * i ] = f[dim * i ] + dAidxi; f[dim * i + 1] = f[dim * i + 1] + dAidyi; f[dim * i + 2] = f[dim * i + 2] + dAidzi; if (dim == 4) { dAidwi = surften*(P2[i] * sumdAijddijdwi + P4[i] * sumdAijddijdwiAjk); f[dim * i + 3] = f[dim * i + 3] + dAidwi; } totsasa = totsasa + Ai; } } /* printf("SASA %f , ESURF %f \n",totsasa,totsasa*surften); */ esurf = totsasa * surften; } /* Compute the GB, Coulomb and Lennard-Jones energies and derivatives. */ epol = elec = evdw = 0.0; #pragma omp parallel reduction (+: epol, elec, evdw) \ private (i, i34, ri, qi, qj, expmkf, dielfac, qi2h, qid2h, iaci, \ xi, yi, zi, wi, k, j, j34, xij, yij, zij, wij, r2, qiqj, \ rj, rb2, efac, fgbi, fgbk, temp4, temp6, eel, de, temp5, \ rinv, r2inv, ic, r6inv, f6, f12, dedx, dedy, dedz, dedw, \ iexw, xj, yj, zj, wj, threadnum, numthreads, foff, soff, \ sumda, thi, ri1i, dij1i, datmp, daix, daiy, daiz, daiw, \ dij2i, dij, sj, sj2, temp1, dij3i, tmpsd, dumbo, npairs) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute offsets into the gradient and sumdeijda arrays for this * thread, but only if OPENMP is defined. */ #ifdef OPENMP soff = prm->Natom * threadnum; foff = dim * soff; #else soff = 0; foff = 0; #endif /* * Initialize the sumdeijda array inside of the parallel region. * * For OpenMP, the "first touch" memory allocation strategy will * locate the copy of the sumdeijda array that is initialized by * a particular CPU in memory that is local to that CPU. * * It is not necessary to synchronize OpenMP threads following * this loop because each thread initializes the particular copy * of the sumdeijda array that it subsequently updates. A similar * argument applies for MPI tasks. */ for (i = 0; i < prm->Natom; i++) { sumdeijda[soff + i] = 0.0; } /* * Allocate and initialize the iexw array used for skipping excluded * atoms. Note that because of the manner in which iexw is used, it * is necessary to initialize it before only the first iteration of * the following loop. */ iexw = ivector(-1, prm->Natom); for (i = -1; i < prm->Natom; i++) { iexw[i] = -1; } /* * Loop over all atoms i. * * Explicitly assign threads to loop indices for the following loop, * in a manner equivalent to (static, N) scheduling with OpenMP, and * identical to the manner in which threads are assigned in nblist. * * Synchronization of OpenMP threads will occur following this loop * because the parallel region ends after this loop. Following * synchronization, a reduction of the sumdeijda array will be * performed. * * Synchronization of MPI tasks will occur via the MPI_Reduce function. */ for (i = 0; i < prm->Natom; i++) { #if defined(OPENMP) || defined(MPI) || defined(SCALAPACK) if ( !myroc(i, BLOCKSIZE, numthreads, threadnum) ) continue; #endif ri = reff[i]; qi = q[i]; /* * If atom i is not frozen, compute the "diagonal" energy that * is a function of only the effective radius Ri but not of the * interatomic distance Dij. Compute also the contribution of * the diagonal energy term to the sum by which the derivative * of Ri will be multiplied. */ if (!frozen[i]) { expmkf = exp(-KSCALE * (*kappa) * ri) / (*diel_ext); dielfac = 1.0 - expmkf; qi2h = 0.5 * qi * qi; qid2h = qi2h * dielfac; epol += -qid2h / ri; sumdeijda[soff + i] += qid2h - KSCALE * (*kappa) * qi2h * expmkf *ri; } /* * Skip the pair calculations if there are no atoms j on the * pair list of atom i. */ npairs = upears[i]; if ( !npairs ) continue; i34 = dim * i; xi = x[i34 ]; yi = x[i34 + 1]; zi = x[i34 + 2]; if (dim == 4) { wi = x[i34 + 3]; } iaci = prm->Ntypes * (prm->Iac[i] - 1); /* * Expand the excluded atom list into the iexw array by storing i * at array address j. */ for (j = 0; j < prm->Iblo[i]; j++) { iexw[IexclAt[i][j] - 1] = i; } /* Initialize the derivative accumulators. */ daix = daiy = daiz = daiw = 0.0; /* Select atoms j from the pair list. Non-graceful error handling. */ for (k = lpears[i]; k < lpears[i] + npairs; k++) { if (pearlist[i] == NULL) { printf("NULL pair list entry in egb loop 3, taskid = %d\n", mytaskid); fflush(stdout); } j = pearlist[i][k]; j34 = dim * j; /* Continue computing the non-diagonal energy term. */ xij = xi - x[j34 ]; yij = yi - x[j34 + 1]; zij = zi - x[j34 + 2]; r2 = xij * xij + yij * yij + zij * zij; if (dim == 4) { wij = wi - x[j34 + 3]; r2 += wij * wij; } /* * Because index j is retrieved from the pairlist array it is * not constrained to a particular range of values; therefore, * the threads that have loaded the reff array must be * synchronized prior to the use of reff below. */ qiqj = qi * q[j]; rj = reff[j]; rb2 = ri * rj; efac = exp(-r2 / (4.0 * rb2)); fgbi = 1.0 / sqrt(r2 + rb2 * efac); fgbk = -(*kappa) * KSCALE / fgbi; expmkf = exp(fgbk) / (*diel_ext); dielfac = 1.0 - expmkf; epol += -qiqj * dielfac * fgbi; temp4 = fgbi * fgbi * fgbi; temp6 = qiqj * temp4 * (dielfac + fgbk * expmkf); de = temp6 * (1.0 - 0.25 * efac); temp5 = 0.5 * efac * temp6 * (rb2 + 0.25 * r2); /* * Compute the contribution of the non-diagonal energy term to the * sum by which the derivatives of Ri and Rj will be multiplied. */ sumdeijda[soff + i] += ri * temp5; sumdeijda[soff + j] += rj * temp5; /* * Compute the Van der Waals and Coulombic energies for only * those pairs that are not on the excluded atom list. Any * pair on the excluded atom list will have atom i stored at * address j of the iexw array. It is not necessary to reset * the elements of the iexw array to -1 between successive * iterations in i because an i,j pair is uniquely identified * by atom i stored at array address j. Thus for example, the * i+1,j pair would be stored at the same address as the i,j * pair but after the i,j pair were used. */ if (iexw[j] != i) { rinv = 1. / sqrt(r2); r2inv = rinv * rinv; /* gas-phase Coulomb energy: */ eel = qiqj * rinv; elec += eel; de -= eel * r2inv; /* Lennard-Jones energy: */ ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6inv = r2inv * r2inv * r2inv; f6 = prm->Cn2[ic] * r6inv; f12 = prm->Cn1[ic] * r6inv * r6inv; evdw += f12 - f6; de -= (12. * f12 - 6. * f6) * r2inv; } } /* * Sum to the gradient vector the derivatives of Dij that are * computed relative to the cartesian coordinates of atoms i and j. */ dedx = de * xij; dedy = de * yij; dedz = de * zij; daix += dedx; daiy += dedy; daiz += dedz; f[foff + j34 ] -= dedx; f[foff + j34 + 1] -= dedy; f[foff + j34 + 2] -= dedz; if (dim == 4) { dedw = de * wij; daiw += dedw; f[foff + j34 + 3] -= dedw; } } /* Update the i elements of the gradient and the sumdeijda array. */ f[foff + i34 ] += daix; f[foff + i34 + 1] += daiy; f[foff + i34 + 2] += daiz; if (dim == 4) { f[foff + i34 + 3] += daiw; } } /* Free the iexw array within this potentially parallel region of code. */ free_ivector(iexw, -1, prm->Natom); } /* Perform a reduction of sumdeijda if MPI or SCALAPACK is defined. */ #if defined(MPI) || defined(SCALAPACK) ierror = MPI_Allreduce(sumdeijda, reductarr, prm->Natom, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (ierror != MPI_SUCCESS) { printf("Error in egb sumdeijda reduction, error = %d mytaskid = %d\n", ierror, mytaskid); } for (i = 0; i < prm->Natom; i++) { sumdeijda[i] = reductarr[i]; } #endif /* * Perform a reduction of sumdeijda if OPENMP is defined. * In the following, the (j,i) loop nesting is more efficient * than (i,j) loop nesting. */ #ifdef OPENMP #pragma omp parallel for private (i) schedule(static, 8) for (j = 0; j < prm->Natom; j++) { for (i = 1; i < maxthreads; i++) { sumdeijda[j] += sumdeijda[prm->Natom * i + j]; } } #endif #pragma omp parallel \ private (i, i34, ri, qi, qj, expmkf, dielfac, qi2h, qid2h, iaci, \ xi, yi, zi, wi, k, j, j34, xij, yij, zij, wij, r2, qiqj, \ rj, rb2, efac, fgbi, fgbk, temp4, temp6, eel, de, temp5, \ rinv, r2inv, ic, r6inv, f6, f12, dedx, dedy, dedz, dedw, \ iexw, xj, yj, zj, wj, threadnum, numthreads, foff, \ sumda, thi, ri1i, dij1i, datmp, daix, daiy, daiz, daiw, \ dij2i, dij, sj, sj2, temp1, dij3i, tmpsd, dumbo, npairs) { /* * Get the thread number and the number of threads for multi-threaded * execution under OpenMP. For all other cases, including ScaLAPACK, * MPI and single-threaded execution, use the values that have been * stored in mytaskid and numtasks, respectively. */ #if defined(OPENMP) threadnum = omp_get_thread_num(); numthreads = omp_get_num_threads(); #else threadnum = mytaskid; numthreads = numtasks; #endif /* * Compute an offset into the gradient array for this thread, * but only if OPENMP is defined. There is no need to * compute an offset into the sumdeijda array because all * copies of this array have been reduced into copy zero. */ #ifdef OPENMP foff = prm->Natom * dim * threadnum; #else foff = 0; #endif /* * Compute the derivatives of the effective radius Ri of atom i * with respect to the cartesian coordinates of each atom j. Sum * all of these derivatives into the gradient vector. * * Loop over all atoms i. * * Synchronization of OpenMP threads will occur following this loop * because the parallel region ends after this loop. A reduction * of the gradient array will occur in the mme34 function, either * for OpenMP or MPI. This reduction will synchronize the MPI * tasks, so an explicit barrier is not necessary at the end of * this loop. * * Explicitly assign threads to loop indices for the following loop, * in a manner equivalent to (static, N) scheduling with OpenMP, and * identical to the manner in which threads are assigned in nblist. */ for (i = 0; i < prm->Natom; i++) { #if defined(OPENMP) || defined(MPI) || defined(SCALAPACK) if ( !myroc(i, BLOCKSIZE, numthreads, threadnum) ) continue; #endif /* * Don't calculate derivatives of the effective radius of atom i * if atom i is frozen or if there are no pair atoms j associated * with atom i. */ npairs = lpears[i] + upears[i]; if ( frozen[i] || !npairs ) continue; i34 = dim * i; xi = x[i34 ]; yi = x[i34 + 1]; zi = x[i34 + 2]; if (dim == 4) { wi = x[i34 + 3]; } ri = rborn[i] - BOFFSET; ri1i = 1. / ri; sumda = sumdeijda[i]; if ( gb>1 ) { ri = rborn[i] - BOFFSET; thi = tanh( (gbalpha - gbbeta*psi[i] + gbgamma*psi[i]*psi[i])*psi[i] ); sumda *= (gbalpha -2.0*gbbeta*psi[i] + 3.0*gbgamma*psi[i]*psi[i]) *(1.0 - thi*thi)*ri/rborn[i]; } /* Initialize the derivative accumulators. */ daix = daiy = daiz = daiw = 0.0; /* Select atom j from the pair list. Non-graceful error handling. */ for (k = 0; k < npairs; k++) { if (pearlist[i] == NULL) { printf("NULL pair list entry in egb loop 4, taskid = %d\n", mytaskid); fflush(stdout); } j = pearlist[i][k]; j34 = dim * j; xij = xi - x[j34 ]; yij = yi - x[j34 + 1]; zij = zi - x[j34 + 2]; r2 = xij * xij + yij * yij + zij * zij; if (dim == 4) { wij = wi - x[j34 + 3]; r2 += wij * wij; } if ( r2 > rgbmaxpsmax2 ) continue; dij1i = 1.0 / sqrt(r2); dij2i = dij1i * dij1i; dij = r2 * dij1i; sj = fs[j] * (rborn[j] - BOFFSET); sj2 = sj * sj; /* * The following are the numerator of the first derivatives of the * effective radius Ri with respect to the interatomic distance Dij. * They are derived from the equations from the Appendix of Schaefer * and Froemmel as well as from the Taylor series expansion for d>>s * by Andreas Svrcek-Seiler. The smooth rgbmax idea is from Andreas * Svrcek-Seiler and Alexey Onufriev. The complete derivative is * formed by multiplying the numerator by -Ri*Ri. The factor of Ri*Ri * has been moved to the terms that are multiplied by the derivative. * The negation is deferred until later. When the chain rule is used * to form the first derivatives of the effective radius with respect * to the cartesian coordinates, an additional factor of Dij appears * in the denominator. That factor is included in the following * expressions. */ if ( dij > rgbmax + sj ) continue; if( dij > rgbmax - sj ){ temp1 = 1./(dij-sj); dij3i = dij1i * dij2i; datmp = 0.125 * dij3i * ((r2 + sj2) * (temp1*temp1 - rgbmax2i) - 2.0 * log(rgbmax*temp1)); } else if(dij >4.0*sj) { tmpsd = sj2*dij2i; dumbo = TE + tmpsd*(TF+tmpsd*(TG+tmpsd*(TH+tmpsd*THH))); datmp = tmpsd*sj*dij2i*dij2i*dumbo; } else if (dij > ri + sj) { temp1 = 1. / (r2 - sj2); datmp = temp1 * sj * (-0.5 * dij2i + temp1) + 0.25 * dij1i * dij2i * log((dij - sj) / (dij + sj)); } else if (dij > fabs(ri - sj)) { temp1 = 1. / (dij + sj); dij3i = dij1i * dij1i * dij1i; datmp = -0.25 * (-0.5 * (r2 - ri * ri + sj2) * dij3i * ri1i * ri1i + dij1i * temp1 * (temp1 - dij1i) - dij3i * log(ri * temp1)); } else if (ri < sj) { temp1 = 1. / (r2 - sj2); datmp = -0.5 * (sj * dij2i * temp1 - 2. * sj * temp1 * temp1 - 0.5 * dij2i * dij1i * log((sj - dij) / (sj + dij))); } else { datmp = 0.; } /* Sum the derivatives into daix, daiy and daiz. */ daix += xij * datmp; daiy += yij * datmp; daiz += zij * datmp; /* * Sum the derivatives relative to atom j (weighted by -sumdeijda[i]) * into the gradient vector. For example, f[j34 + 2] contains the * derivatives of Ri with respect to the z-coordinate of atom j. */ datmp *= sumda; f[foff + j34 ] += xij * datmp; f[foff + j34 + 1] += yij * datmp; f[foff + j34 + 2] += zij * datmp; if (dim == 4) { daiw += wij * datmp; f[foff + j34 + 3] += wij * datmp; } } /* * Update the gradient vector with the sums of derivatives of the * effective radius Ri with respect to the cartesian coordinates. * For example, f[i34 + 1] contains the sum of derivatives of Ri * with respect to the y-coordinate of each atom. Multiply by * -sumdeijda[i] here (instead of merely using datmp multiplied by * -sumdeijda) in order to distribute the product across the sum of * derivatives in an attempt to obtain greater numeric stability. */ f[foff + i34 ] -= sumda * daix; f[foff + i34 + 1] -= sumda * daiy; f[foff + i34 + 2] -= sumda * daiz; if (dim == 4) { f[foff + i34 +3] -= sumda * daiw; } } } /* Return elec and evdw through the reference parameters eelt and enb. */ *eelt = elec; *enb = evdw; /* Free the static arrays if STATIC_ARRAYS is undefined. */ #ifndef STATIC_ARRAYS if (reff != NULL) free_vector(reff, 0, n); reff = NULL; if (sumdeijda != NULL) free_vector(sumdeijda, 0, maxthreads*n); sumdeijda = NULL; if (psi != NULL) free_vector(psi, 0, n); psi = NULL; #if defined(MPI) || defined(SCALAPACK) if (reductarr != NULL) free_vector(reductarr, 0, n); reductarr = NULL; #endif #endif return (epol); } /*********************************************************************** MME34() ************************************************************************/ /* * Here is the mme function for 3D or 4D, depending upon the dim variable. * * Calling parameters are as follows: * * x - input: the atomic (x,y,z) coordinates * f - updated: the gradient vector * iter - the iteration counter, which if negative selects the following: * -1 print some energy values * -3 call egb to deallocate static arrays, then deallocate grad * -(any other value) normal execution */ static REAL_T mme34( REAL_T *x, REAL_T *f, int *iter ) { extern REAL_T tconjgrad; REAL_T ebh, eba, eth, eta, eph, epa, enb, eel, enb14, eel14, ecn, e_gb, frms; REAL_T ene[12]; REAL_T t1, t2; static REAL_T *grad=NULL; int i, j, k, goff, threadnum, numthreads, maxthreads; int dummy = 0; size_t n; #if defined(MPI) || defined(SCALAPACK) int ierror; REAL_T reductarr[12]; #endif t1 = seconds(); n = (size_t)prm->Natom; /* * If OPENMP is defined, set maxthreads to the maximum number of * OpenMP threads; otherwise, set maxthreads to one. */ #ifdef OPENMP maxthreads = omp_get_max_threads(); #else maxthreads = 1; #endif /* * If the iteration count equals -3, call egb to deallocate the * static arrays, deallocate the gradient array, then return; * otherwise, simply return. */ if (*iter == -3) { egb(lpairs, upairs, pairlist, x, grad, prm->Fs, prm->Rborn, prm->Charges, &kappa, &epsext, &enb, &eel, 1); if (grad != NULL) free_vector(grad, 0, maxthreads * dim * n); grad = NULL; return (0.0); } /* If the iteration count equals 0, print the header for task 0 only. */ if (*iter == 0 && mytaskid == 0) { printf (" iter Total bad vdW elect. cons. genBorn frms\n"); fflush(stdout); } /* If the iteration count equals 0, initialize the timing variables. */ if (*iter == 0) { tnonb = tpair = tbond = tangl = tphi = tborn = tcons = tmme = 0.0; tconjgrad = tmd = 0.0; } /* * Write the checkpoint file every nchk iteration if the chknm * variable is non-NULL, but for task 0 only. */ if (chknm != NULL && (*iter > 0 && *iter % nchk == 0) && mytaskid == 0) { checkpoint(chknm, prm->Natom, x, *iter); } /* * Build the pair list if it hasn't already been built; rebuild * it every nsnb iteration. */ if (nb_pairs < 0 || (*iter > 0 && *iter % nsnb == 0)) { nb_pairs = nblist(lpairs, upairs, pairlist, x, dummy, 1); t2 = seconds(); tpair += t2 - t1; t1 = t2; } /* * If OPENMP is defined, allocate a gradient vector for each thread, * and let each thread initialize its gradient vector so that the * "first touch" strategy will allocate local memory. If OpenMP * is not defined, allocate one gradient vector. * * Note: the following allocations assume that the dimensionality * of the problem does not change during one invocation of NAB. * If, for example, mme34 were called with dim==3 and then with dim==4, * these allocations would not be repeated for the larger value * of n that would be necessitated by dim==4. */ if (grad == NULL) { grad = vector(0, maxthreads * dim * n); } #ifdef OPENMP #pragma omp parallel private (i, goff) { goff = dim * n * omp_get_thread_num(); for (i = 0; i < dim * prm->Natom; i++) { grad[goff + i] = 0.0; } } #else for (i = 0; i < dim * prm->Natom; i++) { grad[i] = 0.0; } #endif t2 = seconds(); tmme += t2 - t1; t1 = t2; ebh = ebond(prm->Nbonh, prm->BondHAt1, prm->BondHAt2, prm->BondHNum, prm->Rk, prm->Req, x, grad); eba = ebond(prm->Mbona, prm->BondAt1, prm->BondAt2, prm->BondNum, prm->Rk, prm->Req, x, grad); ene[3] = ebh + eba; t2 = seconds(); tbond += t2 - t1; t1 = t2; eth = eangl(prm->Ntheth, prm->AngleHAt1, prm->AngleHAt2, prm->AngleHAt3, prm->AngleHNum, prm->Tk, prm->Teq, x, grad); eta = eangl(prm->Ntheta, prm->AngleAt1, prm->AngleAt2, prm->AngleAt3, prm->AngleNum, prm->Tk, prm->Teq, x, grad); ene[4] = eth + eta; t2 = seconds(); tangl += t2 - t1; t1 = t2; eph = ephi(prm->Nphih, prm->DihHAt1, prm->DihHAt2, prm->DihHAt3, prm->DihHAt4, prm->DihHNum, prm->Pk, prm->Pn, prm->Phase, x, grad); epa = ephi(prm->Mphia, prm->DihAt1, prm->DihAt2, prm->DihAt3, prm->DihAt4, prm->DihNum, prm->Pk, prm->Pn, prm->Phase, x, grad); ene[5] = eph + epa; ene[6] = 0.0; /* hbond term not in Amber-94 force field */ t2 = seconds(); tphi += t2 - t1; t1 = t2; /* In the following lpairs is a dummy argument that is not used. */ nbond(lpairs, prm->N14pairs, N14pearlist, 1, x, grad, &enb14, &eel14, scnb, scee); ene[7] = enb14; ene[8] = eel14; t2 = seconds(); tnonb += t2 - t1; t1 = t2; if (e_debug) { EXPR("%9.3f", enb14); EXPR("%9.3f", eel14); } if (nconstrained) { ecn = econs(x, grad); t2 = seconds(); tcons += t2 - t1; t1 = t2; } else ecn = 0.0; ene[9] = ecn; if (gb) { e_gb = egb(lpairs, upairs, pairlist, x, grad, prm->Fs, prm->Rborn, prm->Charges, &kappa, &epsext, &enb, &eel, 0); t2 = seconds(); tborn += t2 - t1; t1 = t2; ene[1] = enb; ene[2] = eel; ene[10] = e_gb; ene[11] = esurf; if (e_debug) { EXPR("%9.3f", enb); EXPR("%9.3f", eel); EXPR("%9.3f", e_gb); EXPR("%9.3f", esurf); } } else { nbond(lpairs, upairs, pairlist, 0, x, grad, &enb, &eel, 1.0, 1.0); t2 = seconds(); tnonb += t2 - t1; t1 = t2; ene[1] = enb; ene[2] = eel; ene[10] = 0.0; ene[11] = 0.0; if (e_debug) { EXPR("%9.3f", enb); EXPR("%9.3f", eel); } } /* * Perform a reduction over the gradient vector if OPENMP or MPI * is defined. * * If OPENMP is defined, the reduction is performed by the following loop. * The (j,i) loop nesting is more efficient than (i,j) nesting. * * If MPI is defined, the reduction is performed by MPI_Reduce. */ #if defined(OPENMP) for (i = 0; i < dim * prm->Natom; i++) { f[i] = grad[i]; } goff = dim * prm->Natom; #pragma omp parallel for private (i, j) for (j = 0; j < dim * prm->Natom; j++) { for (i = 1; i < maxthreads; i++) { f[j] += grad[goff * i + j]; } } #elif defined(MPI) || defined(SCALAPACK) ierror = MPI_Allreduce(grad, f, dim * prm->Natom, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (ierror != MPI_SUCCESS) { printf("Error in mme34 grad reduction, error = %d mytaskid = %d\n", ierror, mytaskid); } #else for (i = 0; i < dim * prm->Natom; i++) { f[i] = grad[i]; } #endif ene[0] = 0.0; for (k = 1; k <= 11; k++) ene[0] += ene[k]; for (k = 0; k < prm->Natom; k++) { /* zero out frozen forces */ if (frozen[k]) { f[dim * k + 0] = f[dim * k + 1] = f[dim * k + 2] = 0.0; if (dim == 4) { f[dim * k + 3] = 0.0; } } } #ifdef PRINT_DERIV k = 0; for (i = 0; i < 105; i++) { k++; printf("%10.5f", f[i]); if (k % 8 == 0) printf("\n"); } printf("\n"); #endif frms = 0.0; for (i = 0; i < dim * prm->Natom; i++) frms += f[i] * f[i]; frms = sqrt(frms / (dim * prm->Natom)); /* If MPI is defined perform a reduction of the ene array. */ #if defined(MPI) || defined(SCALAPACK) ierror = MPI_Allreduce(ene, reductarr, 12, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (ierror != MPI_SUCCESS) { printf("Error in mme34 ene reduction, error = %d mytaskid = %d\n", ierror, mytaskid); } for (i = 0; i < 12; i++) { ene[i] = reductarr[i]; } #endif /* * Print the energies and rms gradient but only for task zero, * and only for positive values of the iteration counter. */ if (mytaskid == 0) { if (*iter > -1 && (*iter == 0 || *iter % ntpr == 0)) { printf("ff:%4d %9.2f %9.2f %9.2f %9.2f %9.2f %9.2f %9.2e\n", *iter, ene[0], ene[3] + ene[4] + ene[5], ene[1] + ene[7], ene[2] + ene[8], ene[9]+ene[11], ene[10], frms); fflush(stdout); } } /* A value of -1 for the iteration counter is reserved for printing. */ if( *iter == -1 ){ printf( " bond: %15.9f\n", ene[3] ); printf( " angle: %15.9f\n", ene[4] ); printf( " dihedral: %15.9f\n", ene[5] ); printf( " enb14: %15.9f\n", ene[7] ); printf( " eel14: %15.9f\n", ene[8] ); printf( " enb: %15.9f\n", ene[1] ); printf( " eel: %15.9f\n", ene[2] ); printf( " egb: %15.9f\n", ene[10] ); printf( " econs: %15.9f\n", ene[9] ); printf( " esurf: %15.9f\n", ene[11] ); printf( " Total: %15.9f\n", ene[0] ); } /* If STATIC_ARRAYS is not defined, deallocate the gradient array. */ #ifndef STATIC_ARRAYS if (grad != NULL) free_vector(grad, 0, maxthreads * dim * n); grad = NULL; #endif t2 = seconds(); tmme += t2 - t1; return (ene[0]); } /*********************************************************************** MME_TIMER() ************************************************************************/ /* Print a timing summary but only for task zero. */ int mme_timer( void ) { /* Use the maximum time from all MPI tasks or SCALAPACK processes. */ #if defined(MPI) || defined(SCALAPACK) REAL_T timarr[10], reductarr[10]; timarr[0] = tcons; timarr[1] = tbond; timarr[2] = tangl; timarr[3] = tphi; timarr[4] = tpair; timarr[5] = tnonb; timarr[6] = tborn; timarr[7] = tmme; timarr[8] = tconjgrad; timarr[9] = tmd; MPI_Allreduce(timarr, reductarr, 10, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); tcons = reductarr[0]; tbond = reductarr[1]; tangl = reductarr[2]; tphi = reductarr[3]; tpair = reductarr[4]; tnonb = reductarr[5]; tborn = reductarr[6]; tmme = reductarr[7]; tconjgrad = reductarr[8]; tmd = reductarr[9]; #endif if (mytaskid == 0) { printf("\nFirst derivative timing summary:\n"); printf(" constraints %10.2f\n", tcons); printf(" bonds %10.2f\n", tbond); printf(" angles %10.2f\n", tangl); printf(" torsions %10.2f\n", tphi); printf(" pairlist %10.2f\n", tpair); printf(" nonbonds %10.2f\n", tnonb); printf(" gen. Born %10.2f\n", tborn); printf(" mme %10.2f\n", tmme); printf(" Total %10.2f\n\n", tcons + tbond + tangl + tphi + tpair + tnonb + tborn + tmme); printf(" conj. grad. %10.2f\n", tconjgrad); printf(" molec. dyn. %10.2f\n\n", tmd); fflush(stdout); } return (0); }