/* * Energy subroutines, to be used in sff.c * * Parallelization via OpenMP and creation of pair lists using a kd tree * were added by Russ Brown (russ.brown@sun.com) */ #undef KDTREE_DEBUG REAL_T econs( REAL_T *x, REAL_T *f ) { int i; REAL_T e_cons, rx, ry, rz; e_cons = 0.0; for (i = 0; i < prm->Natom; i++) { if (constrained[i]) { rx = x[3 * i] - x0[3 * i]; ry = x[3 * i + 1] - x0[3 * i + 1]; rz = x[3 * i + 2] - x0[3 * i + 2]; e_cons += wcons * (rx * rx + ry * ry + rz * rz); f[3 * i] += 2. * wcons * rx; f[3 * i + 1] += 2. * wcons * ry; f[3 * i + 2] += 2. * wcons * rz; } } return (e_cons); } REAL_T ebond( int nbond, int *a1, int *a2, int *atype, REAL_T *Rk, REAL_T *Req, REAL_T *x, REAL_T *f ) { int i, at1, at2, atyp; REAL_T e_bond, r, rx, ry, rz, r2, s, db, df, e; e_bond = 0.0; for (i = 0; i < nbond; i++) { at1 = a1[i]; at2 = a2[i]; atyp = atype[i] - 1; rx = x[at1] - x[at2]; ry = x[at1 + 1] - x[at2 + 1]; rz = x[at1 + 2] - x[at2 + 2]; r2 = rx * rx + ry * ry + rz * rz; s = sqrt(r2); r = 2.0 / s; db = s - Req[atyp]; df = Rk[atyp] * db; e = df * db; e_bond += e; df *= r; f[at1 + 0] += rx * df; f[at1 + 1] += ry * df; f[at1 + 2] += rz * df; f[at2 + 0] -= rx * df; f[at2 + 1] -= ry * df; f[at2 + 2] -= rz * df; } if (e_debug) EXPR("%9.3f", e_bond); return (e_bond); } REAL_T eangl( int nang, int *a1, int *a2, int *a3, int *atype, REAL_T *Tk, REAL_T *Teq, REAL_T *x, REAL_T *f ) { int i, atyp, at1, at2, at3; REAL_T dxi, dyi, dzi, dxj, dyj, dzj, ri2, rj2, ri, rj, rir, rjr; REAL_T dxir, dyir, dzir, dxjr, dyjr, dzjr, cst, at, da, df, e, e_theta; REAL_T xtmp, dxtmp, ytmp, dytmp, ztmp, dztmp; e_theta = 0.0; for (i = 0; i < nang; i++) { at1 = a1[i]; at2 = a2[i]; at3 = a3[i]; atyp = atype[i] - 1; dxi = x[at1] - x[at2]; dyi = x[at1 + 1] - x[at2 + 1]; dzi = x[at1 + 2] - x[at2 + 2]; dxj = x[at3] - x[at2]; dyj = x[at3 + 1] - x[at2 + 1]; dzj = x[at3 + 2] - x[at2 + 2]; ri2 = dxi * dxi + dyi * dyi + dzi * dzi; rj2 = dxj * dxj + dyj * dyj + dzj * dzj; ri = sqrt(ri2); rj = sqrt(rj2); rir = 1. / ri; rjr = 1. / rj; dxir = dxi * rir; dyir = dyi * rir; dzir = dzi * rir; dxjr = dxj * rjr; dyjr = dyj * rjr; dzjr = dzj * rjr; cst = dxir * dxjr + dyir * dyjr + dzir * dzjr; if (cst > 1.0) cst = 1.0; if (cst < -1.0) cst = -1.0; at = acos(cst); da = at - Teq[atyp]; df = da * Tk[atyp]; e = df * da; e_theta = e_theta + e; df = df + df; at = sin(at); if (at > 0 && at < 1.e-3) at = 1.e-3; else if (at < 0 && at > -1.e-3) at = -1.e-3; df = -df / at; xtmp = df * rir * (dxjr - cst * dxir); dxtmp = df * rjr * (dxir - cst * dxjr); ytmp = df * rir * (dyjr - cst * dyir); dytmp = df * rjr * (dyir - cst * dyjr); ztmp = df * rir * (dzjr - cst * dzir); dztmp = df * rjr * (dzir - cst * dzjr); f[at1 + 0] += xtmp; f[at3 + 0] += dxtmp; f[at2 + 0] -= xtmp + dxtmp; f[at1 + 1] += ytmp; f[at3 + 1] += dytmp; f[at2 + 1] -= ytmp + dytmp; f[at1 + 2] += ztmp; f[at3 + 2] += dztmp; f[at2 + 2] -= ztmp + dztmp; } if (e_debug) EXPR("%9.3f", e_theta); return (e_theta); } REAL_T ephi( int nphi, int *a1, int *a2, int *a3, int *a4, int *atype, REAL_T *Pk, REAL_T *Pn, REAL_T *Phase, REAL_T *x, REAL_T *f ) { REAL_T e, co, den, co1, uu, vv, uv, ax, bx, cx, ay, by, cy, az, bz, cz; REAL_T a0x, b0x, c0x, a0y, b0y, c0y, a0z, b0z, c0z, a1x, b1x; REAL_T a1y, b1y, a1z, b1z, a2x, b2x, a2y, b2y, a2z, b2z; REAL_T dd1x, dd2x, dd3x, dd4x, dd1y, dd2y, dd3y, dd4y, dd1z, dd2z, dd3z, dd4z; REAL_T df, aa, bb, cc, ab, bc, ac, ktot, cosq; REAL_T ktors, phase, e_tors; int i, at1, at2, at3, at4, atyp; REAL_T ux, uy, uz, vx, vy, vz, delta, phi, dx1, dy1, dz1, yy, pi; pi = 3.1415927; e_tors = 0.0; for (i = 0; i < nphi; i++) { at1 = a1[i]; at2 = a2[i]; at3 = abs(a3[i]); at4 = abs(a4[i]); atyp = atype[i] - 1; ax = x[at2 + 0] - x[at1 + 0]; ay = x[at2 + 1] - x[at1 + 1]; az = x[at2 + 2] - x[at1 + 2]; bx = x[at3 + 0] - x[at2 + 0]; by = x[at3 + 1] - x[at2 + 1]; bz = x[at3 + 2] - x[at2 + 2]; cx = x[at4 + 0] - x[at3 + 0]; cy = x[at4 + 1] - x[at3 + 1]; cz = x[at4 + 2] - x[at3 + 2]; # define DOT(a,b,c,d,e,f) a*d + b*e + c*f ab = DOT(ax, ay, az, bx, by, bz); bc = DOT(bx, by, bz, cx, cy, cz); ac = DOT(ax, ay, az, cx, cy, cz); aa = DOT(ax, ay, az, ax, ay, az); bb = DOT(bx, by, bz, bx, by, bz); cc = DOT(cx, cy, cz, cx, cy, cz); uu = (aa * bb) - (ab * ab); vv = (bb * cc) - (bc * bc); uv = (ab * bc) - (ac * bb); den = 1.0 / sqrt(fabs(uu * vv)); co = uv * den; co1 = 0.5 * co * den; a0x = -bc * bx + bb * cx; a0y = -bc * by + bb * cy; a0z = -bc * bz + bb * cz; b0x = ab * cx + bc * ax - 2. * ac * bx; b0y = ab * cy + bc * ay - 2. * ac * by; b0z = ab * cz + bc * az - 2. * ac * bz; c0x = ab * bx - bb * ax; c0y = ab * by - bb * ay; c0z = ab * bz - bb * az; a1x = 2. * uu * (-cc * bx + bc * cx); a1y = 2. * uu * (-cc * by + bc * cy); a1z = 2. * uu * (-cc * bz + bc * cz); b1x = 2. * uu * (bb * cx - bc * bx); b1y = 2. * uu * (bb * cy - bc * by); b1z = 2. * uu * (bb * cz - bc * bz); a2x = -2. * vv * (bb * ax - ab * bx); a2y = -2. * vv * (bb * ay - ab * by); a2z = -2. * vv * (bb * az - ab * bz); b2x = 2. * vv * (aa * bx - ab * ax); b2y = 2. * vv * (aa * by - ab * ay); b2z = 2. * vv * (aa * bz - ab * az); dd1x = (a0x - a2x * co1) * den; dd1y = (a0y - a2y * co1) * den; dd1z = (a0z - a2z * co1) * den; dd2x = (-a0x - b0x - (a1x - a2x - b2x) * co1) * den; dd2y = (-a0y - b0y - (a1y - a2y - b2y) * co1) * den; dd2z = (-a0z - b0z - (a1z - a2z - b2z) * co1) * den; dd3x = (b0x - c0x - (-a1x - b1x + b2x) * co1) * den; dd3y = (b0y - c0y - (-a1y - b1y + b2y) * co1) * den; dd3z = (b0z - c0z - (-a1z - b1z + b2z) * co1) * den; dd4x = (c0x - b1x * co1) * den; dd4y = (c0y - b1y * co1) * den; dd4z = (c0z - b1z * co1) * den; if (prm->Nhparm && a3[i] < 0) { /* here we will use a quadratic form for the improper torsion */ co = co > 1.0 ? 1.0 : co; co = co < -1.0 ? -1.0 : co; phi = acos(co); /* now calculate sin(phi) because cos(phi) is symmetric, so we can decide between +-phi. */ ux = ay * bz - az * by; uy = az * bx - ax * bz; uz = ax * by - ay * bx; vx = by * cz - bz * cy; vy = bz * cx - bx * cz; vz = bx * cy - by * cx; dx1 = uy * vz - uz * vy; dy1 = uz * vx - ux * vz; dz1 = ux * vy - uy * vx; dx1 = DOT(dx1, dy1, dz1, bx, by, bz); if (dx1 < 0.0) phi = -phi; delta = phi - Phase[atyp]; delta = delta > pi ? pi : delta; delta = delta < -pi ? -pi : delta; df = Pk[atyp] * delta; e = df * delta; e_tors += e; yy = sin(phi); /* Decide what expansion to use Check first for the "normal" expression, since it will be the most used the 0.001 value could be lowered for increased precision. This insures ~1e-05% error for sin(phi)=0.001 */ if (fabs(yy) > 0.001) { df = -2.0 * df / yy; } else { if (fabs(delta) < 0.10) { if (Phase[atyp] == 0.0) { df = -2.0 * Pk[atyp] * (1 + phi * phi / 6.0); } else { if (fabs(Phase[atyp]) == pi) { df = 2.0 * Pk[atyp] * (1 + delta * delta / 6.0); } } } else { if ((phi > 0.0 && phi < (pi / 2.0)) || (phi < 0.0 && phi > -pi / 2.0)) df = df * 1000.; else df = -df * 1000.; } } } else { multi_term: if (fabs(Phase[atyp] - 3.142) < 0.01) phase = -1.0; else phase = 1.0; ktors = Pk[atyp]; switch ((int) fabs(Pn[atyp])) { case 1: e = ktors * (1.0 + phase * co); df = phase * ktors; break; case 2: e = ktors * (1.0 + phase * (2.*co*co -1.)); df = phase*ktors*4.*co; break; case 3: cosq = co*co; e = ktors * (1.0 + phase * co*(4.*cosq -3.)); df = phase*ktors*(12.*cosq - 3.); break; case 4: cosq = co*co; e = ktors * (1.0 + phase * (8.*cosq*(cosq - 1.) + 1.)); df = phase*ktors*co*( 32.*cosq -16.); break; case 6: cosq = co*co; e = ktors * (1.0 + phase * (32.*cosq*cosq*cosq - 48.*cosq*cosq + 18.*cosq - 1.)); df = phase*ktors*co*( 192.*cosq*cosq - 192.*cosq + 36. ); break; default: fprintf(stderr, "bad value for Pn: %d %d %d %d %8.3f\n", at1, at2, at3, at4, Pn[atyp]); exit(1); } e_tors += e; } f[at1 + 0] += df * dd1x; f[at1 + 1] += df * dd1y; f[at1 + 2] += df * dd1z; f[at2 + 0] += df * dd2x; f[at2 + 1] += df * dd2y; f[at2 + 2] += df * dd2z; f[at3 + 0] += df * dd3x; f[at3 + 1] += df * dd3y; f[at3 + 2] += df * dd3z; f[at4 + 0] += df * dd4x; f[at4 + 1] += df * dd4y; f[at4 + 2] += df * dd4z; #ifdef PRINT_EPHI printf("%4d %4d %4d %4d %4d %9.4f\n", i + 1, at1 / 3, at2 / 3, at3 / 3, at4 / 3, e); printf("%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f%10.5f\n", -df * dd1x, -df * dd1y, -df * dd1z, -df * dd2x, -df * dd2y, -df * dd2z, -df * dd3x, -df * dd3y); printf("%10.5f%10.5f%10.5f%10.5f\n", -df * dd3z, -df * dd4x, -df * dd4y, -df * dd4z); #endif if (Pn[atyp] < 0) { atyp++; goto multi_term; } } if (e_debug) EXPR("%9.3f", e_tors); return (e_tors); } REAL_T eimphi( int nphi, int *a1, int *a2, int *a3, int *a4, int *atype, REAL_T *Pk, REAL_T *Peq, REAL_T *x, REAL_T *f ) { /* calculate improper dihedral energies and forces according to T.Schlick, J.Comput.Chem, vol 10, 7 (1989) with local variant (no delta funcion involved) If |sin(phi)|.gt.1e-03 use the angle directly V = kimp ( phi -impeq ) ^2 dV/dcos(phi) = -2*k/sin(phi) If |sin(phi)|.lt.1e-03 and (phi-Peq).lt.0.1 and cnseq(phi)=0. V = kimp (phi - impeq)^2 dV/dcos(phi) = -2*k*(1+phi*phi/6.) If |sin(phi)|.lt.1e-03 and (phi-Peq).lt.0.1 and cnseq(phi)=pi V = kimp (phi - impeq)^2 dV/dcos(phi) = 2*k*(1+phi*phi/6.) Now, if |sin(phi)|.lt.1e-03 but far from minimum, set sin(phi) to 1e-03 arbitrarily */ REAL_T e; REAL_T co, phi, den, co1; REAL_T uu, vv, uv; REAL_T ax, bx, cx; REAL_T ay, by, cy; REAL_T az, bz, cz; REAL_T ux, uy, uz; REAL_T vx, vy, vz; REAL_T dx1, dy1, dz1; REAL_T a0x, b0x, c0x; REAL_T a0y, b0y, c0y; REAL_T a0z, b0z, c0z; REAL_T a1x, b1x; REAL_T a1y, b1y; REAL_T a1z, b1z; REAL_T a2x, b2x; REAL_T a2y, b2y; REAL_T a2z, b2z; REAL_T dd1x, dd2x, dd3x, dd4x; REAL_T dd1y, dd2y, dd3y, dd4y; REAL_T dd1z, dd2z, dd3z, dd4z; REAL_T df, delta, pi, yy; REAL_T aa, bb, cc, ab, bc, ac; REAL_T e_imp; int i; int at1, at2, at3, at4, atyp; pi = 3.1415927; e_imp = 0.0; for (i = 0; i < nphi; i++) { at1 = a1[i]; at2 = a2[i]; at3 = abs(a3[i]); at4 = abs(a4[i]); atyp = atype[i] - 1; /* calculate distances */ ax = x[at2 + 0] - x[at1 + 0]; ay = x[at2 + 1] - x[at1 + 1]; az = x[at2 + 2] - x[at1 + 2]; bx = x[at3 + 0] - x[at2 + 0]; by = x[at3 + 1] - x[at2 + 1]; bz = x[at3 + 2] - x[at2 + 2]; cx = x[at4 + 0] - x[at3 + 0]; cy = x[at4 + 1] - x[at3 + 1]; cz = x[at4 + 2] - x[at3 + 2]; # define DOT(a,b,c,d,e,f) a*d + b*e + c*f ab = DOT(ax, ay, az, bx, by, bz); bc = DOT(bx, by, bz, cx, cy, cz); ac = DOT(ax, ay, az, cx, cy, cz); aa = DOT(ax, ay, az, ax, ay, az); bb = DOT(bx, by, bz, bx, by, bz); cc = DOT(cx, cy, cz, cx, cy, cz); uu = (aa * bb) - (ab * ab); vv = (bb * cc) - (bc * bc); uv = (ab * bc) - (ac * bb); den = 1.0 / sqrt(fabs(uu * vv)); co = uv * den; co = co > 1.0 ? 1.0 : co; co = co < -1.0 ? -1.0 : co; phi = acos(co); /* now calculate sin(phi) because cos(phi) is symmetric, so we can decide between +-phi. */ ux = ay * bz - az * by; uy = az * bx - ax * bz; uz = ax * by - ay * bx; vx = by * cz - bz * cy; vy = bz * cx - bx * cz; vz = bx * cy - by * cx; dx1 = uy * vz - uz * vy; dy1 = uz * vx - ux * vz; dz1 = ux * vy - uy * vx; dx1 = DOT(dx1, dy1, dz1, bx, by, bz); if (dx1 < 0.0) phi = -phi; delta = phi - Peq[atyp]; delta = delta > pi ? pi : delta; delta = delta < -pi ? -pi : delta; df = Pk[atyp] * delta; e = df * delta; e_imp += e; yy = sin(phi); /* Decide what expansion to use Check first for the "normal" expression, since it will be the most used the 0.001 value could be lowered for increased precision. This insures ~1e-05% error for sin(phi)=0.001 */ if (fabs(yy) > 0.001) { df = -2.0 * df / yy; } else { if (fabs(delta) < 0.10) { if (Peq[atyp] == 0.0) { df = -2.0 * Pk[atyp] * (1 + phi * phi / 6.0); } else { if (fabs(Peq[atyp]) == pi) df = 2.0 * Pk[atyp] * (1 + delta * delta / 6.0); } } else { if ((phi > 0.0 && phi < (pi / 2.0)) || (phi < 0.0 && phi > -pi / 2.0)) df = df * 1000.; else df = -df * 1000.; } } co1 = 0.5 * co * den; a0x = -bc * bx + bb * cx; a0y = -bc * by + bb * cy; a0z = -bc * bz + bb * cz; b0x = ab * cx + bc * ax - 2. * ac * bx; b0y = ab * cy + bc * ay - 2. * ac * by; b0z = ab * cz + bc * az - 2. * ac * bz; c0x = ab * bx - bb * ax; c0y = ab * by - bb * ay; c0z = ab * bz - bb * az; a1x = 2. * uu * (-cc * bx + bc * cx); a1y = 2. * uu * (-cc * by + bc * cy); a1z = 2. * uu * (-cc * bz + bc * cz); b1x = 2. * uu * (bb * cx - bc * bx); b1y = 2. * uu * (bb * cy - bc * by); b1z = 2. * uu * (bb * cz - bc * bz); a2x = -2. * vv * (bb * ax - ab * bx); a2y = -2. * vv * (bb * ay - ab * by); a2z = -2. * vv * (bb * az - ab * bz); b2x = 2. * vv * (aa * bx - ab * ax); b2y = 2. * vv * (aa * by - ab * ay); b2z = 2. * vv * (aa * bz - ab * az); dd1x = (a0x - a2x * co1) * den; dd1y = (a0y - a2y * co1) * den; dd1z = (a0z - a2z * co1) * den; dd2x = (-a0x - b0x - (a1x - a2x - b2x) * co1) * den; dd2y = (-a0y - b0y - (a1y - a2y - b2y) * co1) * den; dd2z = (-a0z - b0z - (a1z - a2z - b2z) * co1) * den; dd3x = (b0x - c0x - (-a1x - b1x + b2x) * co1) * den; dd3y = (b0y - c0y - (-a1y - b1y + b2y) * co1) * den; dd3z = (b0z - c0z - (-a1z - b1z + b2z) * co1) * den; dd4x = (c0x - b1x * co1) * den; dd4y = (c0y - b1y * co1) * den; dd4z = (c0z - b1z * co1) * den; f[at1 + 0] += df * dd1x; f[at1 + 1] += df * dd1y; f[at1 + 2] += df * dd1z; f[at2 + 0] += df * dd2x; f[at2 + 1] += df * dd2y; f[at2 + 2] += df * dd2z; f[at3 + 0] += df * dd3x; f[at3 + 1] += df * dd3y; f[at3 + 2] += df * dd3z; f[at4 + 0] += df * dd4x; f[at4 + 1] += df * dd4y; f[at4 + 2] += df * dd4z; } return (e_imp); } /* * The downheap function from Robert Sedgewick's "Algorithms in C++" p. 152, * corrected for the fact that Sedgewick indexes the heap array from 1 to n * whereas Java indexes the heap array from 0 to n-1. Note, however, that * the heap should be indexed conceptually from 1 to n in order that for * any node k the two children are found at nodes 2*k and 2*k+1. Move down * the heap, exchanging the node at position k with the larger of its two * children if necessary and stopping when the node at k is larger than both * children or the bottom of the heap is reached. Note that it is possible * for the node at k to have only one child: this case is treated properly. * A full exchange is not necessary because the variable 'v' is involved in * the exchanges. The 'while' loop has two exits: one for the case that * the bottom of the heap is hit, and another for the case that the heap * condition (the parent is greater than or equal to both children) is * satisfied somewhere in the interior of the heap. * * Calling parameters are as follows: * * a - array of indices into the atomic coordinate array x * n - the number of items to be sorted * k - the exchange node (or element) * x - the atomic coordinate array * p - the partition (x, y or z) on which sorting occurs */ static void downheap(int *a, int n, int k, REAL_T *x, int p) { int j, v; v = a[k-1]; while (k <= n/2) { j = k + k; if ( (j < n) && (x[3 * a[j-1] + p] < x[3 * a[j] + p]) ) j++; if (x[3 * v + p] >= x[3 * a[j-1] + p]) break; a[k-1] = a[j-1]; k = j; } a[k-1] = v; } /* * The heapsort function from Robert Sedgewick's "Algorithms in C++" p. 156, * corrected for the fact that Sedgewick indexes the heap array from 1 to n * whereas Java indexes the heap array from 0 to n-1. Note, however, that * the heap should be indexed conceptually from 1 to n in order that for * any node k the two children are found at nodes 2*k and 2*k+1. In what * follows, the 'for' loop heaporders the array in linear time and the * 'while' loop exchanges the largest element with the last element then * repairs the heap. * * Calling parameters are as follows: * * a - array of indices into the atomic coordinate array x * n - the number of items to be sorted * x - the atomic coordinate array * p - the partition (x, y or z) on which sorting occurs */ static void heapsort(int *a, int n, REAL_T *x, int p) { int k, v; for (k = n/2; k >= 1; k--) downheap(a, n, k, x, p); while (n > 1) { v = a[0]; a[0] = a[n-1]; a[n-1] = v; downheap(a, --n, 1, x, p); } } /* * Build the kd tree by recursively subdividing the atom number * arrays and adding nodes to the tree. Note that the arrays * are permuted cyclically as control moves down the tree in * order that sorting occur on x, y and z. The temporary array * is provided to facilitate the copy and partition operation. * * Calling parameters are as follows: * * xn - x sorted array of atom numbers * yn - y sorted array of atom numbers * zn - z sorted array of atom numbers * tn - temporary array for atom numbers * start - first element of array * end - last element of array * kdpptr - pointer to pointer to kd tree node next available for allocation * that - the node currently visited, the equivalent of 'this' in C++ * x - atomic coordinate array * p - the partition (x, y or z) on which sorting occurs */ static void buildkdtree(int *xn, int *yn, int *zn, int *tn, int start, int end, KDNODE **kdpptr, KDNODE *that, REAL_T *x, int p) { int i, middle, lower, upper; REAL_T median; /* The partition cycles modulo 3. */ p %= 3; /* If only one element is passed to this function, add it to the tree. */ if (end == start) { that->n = xn[start]; } /* * Otherwise, if two elements are passed to this function, determine * whether the first element is the low child or the high child. * Allocate a new KDNODE and make it one or the other of the children. */ else if (end == start + 1) { that->n = xn[end]; (*kdpptr)->n = xn[start]; (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; if (x[3 * xn[start] + p] < x[3 * xn[end] + p]) { that->lo = (*kdpptr)++; } else { that->hi = (*kdpptr)++; } } /* Otherwise, more than two elements are passed to this function. */ else { /* * The middle element of the xn array is taken as the element about * which the yn and zn arrays will be partitioned. However, search * lower elements of the xn array to ensure that the x values of the * atomic coordinate array that correspond to these elements are indeed * less than the median value because they may be equal to it. This * approach is consistent with partitioning between < and >=. */ middle = (start + end)/2; median = x[3 * xn[middle] + p]; for (i = middle - 1; i >= start; i--) { if (x[3 * xn[i] + p] < median) { break; } else { middle = i; } } /* Store the middle element at this kd node. */ that->n = xn[middle]; /* * Scan the yn array in ascending order and compare the x value of * each corresponding element of the atomic coordinate array to the * median value. If the x value is less than the median value, copy * the element of the yn array into the lower part of the tn array. * If the x value is greater than or equal to the median value, copy * the element of the yn array into the upper part of the tn array. * The lower part of the tn array begins with the start index, and the * upper part of the tn array begins one element above the middle index. * At the end of this scan and copy operation, the tn array will have * been subdivided into three groups: (1) a group of indices beginning * with start and continuing up to but not including middle, which indices * point to atoms for which the x value is less than the median value; * (2) the middle index that has been stored in this node of the kd tree; * and (3) a group of indices beginning one address above middle and * continuing up to and including end, which indices point to atoms for * which the x value is greater than or equal to the median value. * * This approach preserves the relative heapsorted order of elements * of the atomic coordinate array that correspond to elements of the * yn array while those elements are partitioned about the x median. * * Note: when scanning the yn array, skip the element (i.e., the atom * number) that equals the middle element because that atom number has * been stored at this node of the kd-tree. */ lower = start - 1; upper = middle; for (i = start; i <= end; i++) { if (yn[i] != xn[middle]) { if (x[3 * yn[i] + p] < median) { tn[++lower] = yn[i]; } else { tn[++upper] = yn[i]; } } } /* Ensure that all yn array elements have been copied . */ #ifdef KDTREE_DEBUG if (lower != middle - 1) { printf("yn: lower = %d doesn't equal middle = %d - 1\n", lower, middle); exit (1); } if (upper != end) { printf("yn: upper = %d doesn't equal end = %d\n", upper, end); exit (1); } #endif /* * All elements of the yn array between start and end have been copied * and partitioned into the tn array, so the yn array is available for * elements of the zn array to be copied and partitioned into the yn * array, in the same manner in which elements of the yn array were * copied and partitioned into the tn array. * * This approach preserves the relative heapsorted order of elements * of the atomic coordinate array that correspond to elements of the * zn array while those elements are partitioned about the x median. * * Note: when scanning the zn array, skip the element (i.e., the atom * number) that equals the middle element because that atom number has * been stored at this node of the kd-tree. */ lower = start - 1; upper = middle; for (i = start; i <= end; i++) { if (zn[i] != xn[middle]) { if (x[3 * zn[i] + p] < median) { yn[++lower] = zn[i]; } else { yn[++upper] = zn[i]; } } } /* Ensure that all zn array elements have been copied . */ #ifdef KDTREE_DEBUG if (lower != middle - 1) printf("zn: lower = %d doesn't equal middle = %d - 1\n", lower, middle); if (upper != end) printf("zn: upper = %d doesn't equal end = %d\n", upper, end); #endif /* * Recurse down the lo branch of the tree if the lower group of * the tn array is non-null. Note permutation of the xn, yn, zn * and tn arrays. In particular, xn was used for partitioning at * this level of the tree. At one level down the tree, yn (which * has been copied into tn) will be used for partitioning. At two * levels down the tree, zn (which has been copied into yn) will * be used for partitioning. At three levels down the tree, xn * will be used for partitioning. In this manner, partitioning * cycles through xn, yn and zn at successive levels of the tree. */ if (lower >= start) { (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->lo = (*kdpptr)++; buildkdtree(tn, yn, xn, zn, start, lower, kdpptr, that->lo, x, p+1); } /* * Recurse down the hi branch of the tree if the upper group of * the tn array is non-null. Note permutation of the xn, yn, zn * and tn arrays, as explained above for recursion down the lo * branch of the tree. */ if (upper > middle) { (*kdpptr)->lo = NULL; (*kdpptr)->hi = NULL; that->hi = (*kdpptr)++; buildkdtree(tn, yn, xn, zn, middle+1, end, kdpptr, that->hi, x, p+1); } } } #ifdef KDTREE_DEBUG /* * Check that all children are < the reference kd node. * * Calling parameters are as follows: * * parent - the node against which all children are compared * that - the node currently visited, equivalent to 'this' in C++ * x - atomic coordinate array * p - the partition (x, y or z) on which sorting occurs */ static void checkkdlo(KDNODE *parent, KDNODE *that, REAL_T *x, int p) { /* Check the current kdnode against its parent. */ if (x[3 * that->n + p] > x[3 * parent->n + p]) { printf("Error in checkkdlo! x[%d] = %f > x[%d] = %f\n", 3 * that->n + p, x[3 * that->n + p], 3 * parent->n + p, x[3 * parent->n + p]); exit (1); } /* Check the children against their grandparent. */ if (that->lo != NULL) { checkkdlo(parent, that->lo, x, p); } if (that->hi != NULL) { checkkdlo(parent, that->hi, x, p); } } /* * Check that all children are >= the reference kd node. * * Calling parameters are as follows: * * parent - the node against which all children are compared * that - the node currently visited, equivalent to 'this' in C++ * x - atomic coordinate array * p - the partition (x, y or z) on which sorting occurs */ static void checkkdhi( KDNODE *parent, KDNODE *that, REAL_T *x, int p ) { /* Check the current kdnode against its parent. */ if (x[3 * parent->n + p] > x[3 * that->n + p]) { printf("Error in checkkdhi! x[%d] = %f > x[%d] = %f\n", 3 * parent->n + p, 3 * x[parent->n + p], 3 * that->n + p, 3 * x[that->n + p]); exit (1); } /* Check the children against their grandparent. */ if (that->lo != NULL) { checkkdhi(parent, that->lo, x, p); } if (that->hi != NULL) { checkkdhi(parent, that->hi, x, p); } } /* * Walk the kd tree and call the check functions. All children on the * lo branch of the tree should be < the reference kd node. All children * on the hi branch of the tree should be >= the reference kd node. * * Calling parameters are as follows: * * that - the node currently visited, equivalent to 'this' in C++ * x - atomic coordinate array * p - the partition (x, y or z) on which sorting occurs */ static void checkkdtree( KDNODE *that, REAL_T *x, int p ) { p %= 3; if (that->lo != NULL) { checkkdtree(that->lo, x, p+1); checkkdlo(that, that->lo, x, p); } if (that->hi != NULL) { checkkdtree(that->hi, x, p+1); checkkdhi(that, that->hi, x, p); } } #endif /* * Walk the kd tree and generate the pair lists for the upper and lower * triangles. The pair lists are not ordered in the atom number. * * Calling parameters are as follows: * * that - the node currently visited, equivalent to 'this' in C++ * x - atomic coordinate array * p - the partition (x, y or z) on which sorting occurs * q - the query atom number * lopairs - pair count array indexed by atom for the lower triangle * uppairs - pair count array indexed by atom for the upper triangle * lopairlist - the pair list for the lower triangle * uppairlist - the pair list for the upper triangle */ static void searchkdtree( KDNODE *that, REAL_T *x, int p, int q, int *lopairs, int *uppairs, int *lopairlist, int *uppairlist ) { /* The partition cycles modulo 3. */ p %= 3; /* * Search the low branch of the tree if the atomic coordinate of the * query atom minus the cutoff radius less than the atomic coordinate * of the kd node atom. */ if ( (that->lo != NULL) && (x[3 * q + p] - cut < x[3 * that->n + p]) ) { searchkdtree(that->lo, x, p+1, q, lopairs, uppairs, lopairlist, uppairlist); } /* * If the query atom number does not equal the kd tree node atom number * and at least one of the two atoms is not frozen, calculate the interatomic * distance and add the kd tree node atom to one of the pair lists if the * distance is less than the cutoff distance. The atom belongs on the lower * triangle pair list if the atom number is less than the query node atom * number. Otherwise, it belongs on the upper triangle pair list. */ if ( ( q != that->n ) && ( !frozen[q] || !frozen[that->n] ) ){ REAL_T xij = x[3 * q + 0] - x[3 * that->n + 0]; REAL_T yij = x[3 * q + 1] - x[3 * that->n + 1]; REAL_T zij = x[3 * q + 2] - x[3 * that->n + 2]; REAL_T r2 = xij * xij + yij * yij + zij * zij; if (r2 < cut2) { if (that->n < q) { lopairlist[lopairs[q]++] = that->n; } else { uppairlist[uppairs[q]++] = that->n; } } } /* * Search the high branch of the tree if the atomic coordinate of the * query atom plus the cutoff radius less than than the atomic coordinate * of the kd node atom. */ if ( (that->hi != NULL) && (x[3 * q + p] + cut >= x[3 * that->n + p]) ) { searchkdtree(that->hi, x, p+1, q, lopairs, uppairs, lopairlist, uppairlist); } } /* * Create the non-bonded pairlist using a kd-tree. The kd-tree nodes * are allocated from the kdtree array. * * Calling parameters are as follows: * * x - atomic coordinate array * lopairs - pair count array indexed by atom for the lower triangle * uppairs - pair count array indexed by atom for the upper triangle * lostart - beginning address indexed by atom for the lower triangle * upstart - beginning address indexed by atom for the upper triangle * pairlist - the pair list for the lower and upper triangles * * This function returns the total number of pairs. */ int nblist( REAL_T *x, int *lopairs, int *uppairs, int* lostart, int* upstart, int *bothpairlist ) { int i, j; int *xn, *yn, *zn, *tn; int totpair = 0; KDNODE *kdtree, *kdptr; /* Square the cutoff distance for use in searchkdtree. */ cut2 = cut * cut; /* Allocate the kdtree array that must hold one node per atom. */ if ( (kdtree = (KDNODE *) malloc(prm->Natom*sizeof(KDNODE))) == NULL ) { printf("Error allocate kdnode array in nbtree!\n"); exit (0); } /* * Allocate, initialize and sort the arrays that hold the results of the * heapsort on x,y,z. These arrays are used as pointers (via array indices) * into the atomic coordinate array x. Allocate an additional temp array * so that the buildkdtree function can cycle through x,y,z. */ xn = ivector(0, prm->Natom); yn = ivector(0, prm->Natom); zn = ivector(0, prm->Natom); tn = ivector(0, prm->Natom); for (i = 0; i < prm->Natom; i++) { xn[i] = yn[i] = zn[i] = i; } heapsort(xn, prm->Natom, x, 0); heapsort(yn, prm->Natom, x, 1); heapsort(zn, prm->Natom, x, 2); #ifdef KDTREE_DEBUG /* Verify that heapsort functioned correctly. */ for (i = 1; i < prm->Natom; i++) { if (x[3 * xn[i] + 0] < x[3 * xn[i-1] + 0]) printf("Error in x heapsort! x[%d] = %f x[%d] = %f\n", i, x[3 * xn[i] + 0], i, x[3 * xn[i-1] + 0]); if (x[3 * yn[i] + 1] < x[3 * yn[i-1] + 1]) printf("Error in y heapsort! y[%d] = %f y[%d] = %f\n", i, x[3 * yn[i] + 1], i, x[3 * yn[i-1] + 1]); if (x[3 * zn[i] + 2] < x[3 * zn[i-1] + 2]) printf("Error in z heapsort! z[%d] = %f z[%d] = %f\n", i, x[3 * zn[i] + 2], i, x[3 * zn[i-1] + 2]); } #endif /* Build the kd tree. */ kdptr = kdtree; KDNODE *root = kdptr++; root->lo = NULL; root->hi = NULL; buildkdtree(xn, yn, zn, tn, 0, prm->Natom-1, &kdptr, root, x, 0); /* * Search the kd tree with each atom. Use xn and yn as temporary * arrays for the lower and upper triangle pair lists for one atom. */ for (i = 0; i < prm->Natom; i++) { lostart[i] = totpair; lopairs[i] = uppairs[i] = 0; searchkdtree(root, x, 0, i, lopairs, uppairs, xn, yn); for (j = 0; j < lopairs[i]; j++) { if (totpair >= maxnb) { printf("maximum pairlist size exceeded by lower triangle\n"); exit(1); } else { bothpairlist[totpair++] = xn[j]; } } upstart[i] = totpair; for (j = 0; j < uppairs[i]; j++) { if (totpair >= maxnb) { printf("maximum pairlist size exceeded by upper triangle\n"); exit(1); } else { bothpairlist[totpair++] = yn[j]; } } } #ifdef KDTREE_DEBUG /* Check that the kd tree is built correctly. */ checkkdtree(root, x, 0); /* * Verify that every atom j on the lower triangle pair list has * an atom number less than i. */ for (i = 0; i < prm->Natom; i++) { for (j = 0; j < lopairs[i]; j++) { if (bothpairlist[lostart[i] + j] > i) { printf("Lower triangle j = %d > i = %d\n", bothpairlist[lostart[i] + j], i); exit(1); } } } /* * Verify that every atom j on the upper triangle pair list has * an atom number greater than i. */ for (i = 0; i < prm->Natom; i++) { for (j = 0; j < uppairs[i]; j++) { if (bothpairlist[upstart[i] + j] < i) { printf("Upper triangle j = %d < i = %d\n", bothpairlist[upstart[i] + j], i); exit(1); } } } /* Verify that every atom on the pair list has a valid atom number. */ for (i = 0; i < prm->Natom; i++) { for (j = lostart[i]; j < lostart[i] + lopairs[i] + uppairs[i]; j++) { if (bothpairlist[j] < 0 || bothpairlist[j] >= prm->Natom) { printf("pairlist[%d] = %d is not a valid atom number!\n", j, bothpairlist[j]); exit(1); } } } /* Verify that every atom on the pair list is within the cutoff radius. */ for (i = 0; i < prm->Natom; i++) { REAL_T xi = x[3 * i ]; REAL_T yi = x[3 * i + 1]; REAL_T zi = x[3 * i + 2]; for (j = 0; j < lopairs[i] + uppairs[i]; j++) { REAL_T xij = xi - x[3 * bothpairlist[lostart[i] + j] ]; REAL_T yij = yi - x[3 * bothpairlist[lostart[i] + j] + 1]; REAL_T zij = zi - x[3 * bothpairlist[lostart[i] + j] + 2]; REAL_T r2 = xij * xij + yij * yij + zij * zij; if (r2 >= cut2) { printf("Atom %d is not within the cutoff radius from atom %d\n", bothpairlist[lostart[i] + j], i); exit(1); } } } /* * Verify that every atom not on the pair lists is not within the * cutoff radius. Set every element of the xn array to -1. Then * for every atom i, mark the atoms j on the pairlist of i by setting * xn[j] to i. Mark also atom i by setting xn[i] to i because atom i * is always within its own cutoff radius. Now the atoms j that are * not on the pairlist of atom i may be identified because xn[j] is * not equal to i. For example, xn[j] could be equal to i-1 because * it was marked from the pairlist of atom i-1. Marked atoms remain * validly marked until the entire process begins again with the first i. */ for (i = 0; i < prm->Natom; i++) { xn[i] = -1; } for (i = 0; i < prm->Natom; i++) { REAL_T xi = x[3 * i ]; REAL_T yi = x[3 * i + 1]; REAL_T zi = x[3 * i + 2]; for (j = 0; j < lopairs[i] + uppairs[i]; j++) { xn[bothpairlist[lostart[i] + j]] = i; } xn[i] = i; for (j = 0; j < prm->Natom; j++) { if (xn[j] != i) { REAL_T xij = xi - x[3 * j ]; REAL_T yij = yi - x[3 * j + 1]; REAL_T zij = zi - x[3 * j + 2]; REAL_T r2 = xij * xij + yij * yij + zij * zij; if (r2 < cut2) { printf("Atom %d is within the cutoff radius from atom %d\n", xn[j], i); exit(1); } } } } #endif /* Free the temporary arrays. */ free(kdtree); free_ivector(xn, 0, prm->Natom); free_ivector(yn, 0, prm->Natom); free_ivector(zn, 0, prm->Natom); free_ivector(tn, 0, prm->Natom); return totpair; } /* * Calculate the non-bonded energy and first derivatives. * This function is complicated by the fact that it must * process two forms of pair lists: the 1-4 pair list and * the non-bonded pair list. The non-bonded pair list * must be modified by the excluded atom list whereas the * 1-4 pair list is used unmodified. Also, the non-bonded * pair list comprises lower and upper triangles whereas * the 1-4 pair list comprises an upper triangle only. * * Calling parameters are as follows: * * npairs - the number of pairs on either pair list * pairlist - either the 1-4 pair list or the non-bonded pair list * offset - the starting address of the upper triangle; ignored * for the 1-4 pair list * non14 - set to 1 for the non-bonded pair list, 0 for the 1-4 pair list * x - the atomic coordinate array * f - the gradient vector * enb - Van der Waals energy return value, passed by reference * eel - Coulombic energy return value, passed by reference * enbfac - scale factor for Van der Waals energy * eelfac - scale factor for Coulombic energy */ int nbond( int *npairs, int *pairlist, int *offset, int non14, REAL_T *x, REAL_T *f, REAL_T *enb, REAL_T *eel, REAL_T enbfac, REAL_T eelfac ) { int i, j, jn, ic, npr, lpair, iaci, iexcl, nexcl; int *iexw; REAL_T dumx, dumy, dumz, cgi, r2inv, df2, r6, r10, f1, f2; REAL_T dedx, dedy, dedz, df, enbfaci, eelfaci; REAL_T xi, yi, zi, xij, yij, zij, r, r2; REAL_T dis, kij, d0, diff, rinv, rs, rssq, eps1, epsi, cgijr, pow; int nhbpair, ibig, isml; #if 0 REAL_T hbener, nb14; nhbpair = 0; hbener = 0.0; nb14 = 0.0; #endif #define SIG 0.3 #define DIW 78.0 #define C1 38.5 *enb = 0.; *eel = 0.; enbfaci = 1. / enbfac; eelfaci = 1. / eelfac; /* Initialize offsets into the pair list and the excluded atom list. */ lpair = 0; iexcl = 0; /* * Allocate and initialize the iexw array used for skipping excluded * atoms. Note that because of the manner in which iexw is used, it * is necessary to initialize it before only the first iteration of * the following loop. */ iexw = ivector(-1, prm->Natom); for (i = -1; i < prm->Natom; i++) { iexw[i] = -1; } /* Loop over all atoms i except for the final atom i. */ for (i = 0; i < prm->Natom - 1; i++) { /* * If the 'non14' calling parameter is set, ignore the offset * into the pair list that accumulates with each iteration of * the i-loop, and use instead the beginning address of the upper * triangle of the pair list. */ if (non14 != 0) { lpair = offset[i]; } /* Check whether there are any atoms j on the pair list of atom i. */ nexcl = prm->Iblo[i]; npr = npairs[i]; if (npr > 0) { iaci = prm->Ntypes * (prm->Iac[i] - 1); dumx = 0.0; dumy = 0.0; dumz = 0.0; xi = x[3 * i + 0]; yi = x[3 * i + 1]; zi = x[3 * i + 2]; cgi = eelfaci * prm->Charges[i]; /* * Expand the excluded list into the iexw array by storing i * at array address j. */ for (j = 0; j < nexcl; j++) { iexw[prm->ExclAt[iexcl + j] - 1] = i; } /* Select atoms j from the pair list. */ for (jn = 0; jn < npr; jn++) { j = pairlist[jn + lpair]; /* * If the 'non14' calling parameter is set, check whether * this i,j pair is exempted by the excluded atom list. */ if (non14 == 0 || iexw[j] != i) { xij = xi - x[3 * j + 0]; yij = yi - x[3 * j + 1]; zij = zi - x[3 * j + 2]; r2 = xij * xij + yij * yij + zij * zij; r2inv = 1.0 / r2; r = sqrt(r2); rinv = r * r2inv; /* Calculate the energy and derivatives according to dield. */ if (dield == -3) { /* special code Ramstein & Lavery dielectric, 94 force field */ rs = SIG * r; rssq = rs * rs; pow = exp(-rs); eps1 = rssq + rs + rs + 2.0; epsi = 1.0 / (DIW - C1 * pow * eps1); cgijr = cgi * prm->Charges[j] * rinv * epsi; *eel += cgijr; df2 = -cgijr * (1.0 + C1 * pow * rs * rssq * epsi); ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6 = r2inv * r2inv * r2inv; f2 = prm->Cn2[ic] * r6; f1 = prm->Cn1[ic] * r6 * r6; *enb += (f1 - f2) * enbfaci; df = (df2 + (6.0 * f2 - 12.0 * f1) * enbfaci) * rinv; } else { df = df2 * rinv; } } else if (dield == -4) { /* distance-dependent dielectric code, 94 ff */ /* epsilon = r */ rs = cgi * prm->Charges[j] * r2inv; df2 = -2.0 * rs; *eel += rs; ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6 = r2inv * r2inv * r2inv; f2 = prm->Cn2[ic] * r6; f1 = prm->Cn1[ic] * r6 * r6; *enb += (f1 - f2) * enbfaci; df = (df2 + (6.0 * f2 - 12.0 * f1) * enbfaci) * rinv; } else { df = df2 * rinv; } } else if (dield == -5) { /* non-bonded term from yammp */ dis = r; ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; d0 = prm->Cn2[ic]; if (dis < d0) { kij = prm->Cn1[ic]; diff = dis - d0; *enb += kij * diff * diff; df = 2.0 * kij * diff; } else { df = 0.0; } } else { /* * Code for various dielectric models. * The df2 variable should hold r(dV/dr). */ if (dield == 0) { /* epsilon = r */ rs = cgi * prm->Charges[j] * r2inv; df2 = -2.0 * rs; *eel += rs; } else if (dield == 1) { /* epsilon = 1 */ rs = cgi * prm->Charges[j] * rinv; df2 = -rs; *eel += rs; } else if (dield == -2) { /* Ramstein & Lavery dielectric, PNAS 85, 7231 (1988). */ rs = SIG * r; rssq = rs * rs; pow = exp(-rs); eps1 = rssq + rs + rs + 2.0; epsi = 1.0 / (DIW - C1 * pow * eps1); cgijr = cgi * prm->Charges[j] * rinv * epsi; *eel += cgijr; df2 = -cgijr * (1.0 + C1 * pow * rs * rssq * epsi); } /* Calculate either Van der Waals or hydrogen bonded term. */ ic = prm->Cno[iaci + prm->Iac[j] - 1]; if (ic > 0 || enbfac != 1.0) { if (ic > 0) { ic--; } else { ibig = prm->Iac[i] > prm->Iac[j] ? prm->Iac[i] : prm->Iac[j]; isml = prm->Iac[i] > prm->Iac[j] ? prm->Iac[j] : prm->Iac[i]; ic = ibig * (ibig - 1) / 2 + isml - 1; } r6 = r2inv * r2inv * r2inv; f2 = prm->Cn2[ic] * r6; f1 = prm->Cn1[ic] * r6 * r6; *enb += (f1 - f2) * enbfaci; df = (df2 + (6.0 * f2 - 12.0 * f1) * enbfaci) * rinv; #if 0 if( enbfac != 1.0 ) nb14 += (f1-f2)*enbfaci; #endif } else { ic = -ic - 1; r10 = r2inv * r2inv * r2inv * r2inv * r2inv; f2 = prm->HB10[ic] * r10; f1 = prm->HB12[ic] * r10 * r2inv; *enb += (f1 - f2) * enbfaci; df = (df2 + (10.0 * f2 - 12.0 * f1) * enbfaci) * rinv; nhbpair++; #if 0 hbener += (f1-f2)*enbfaci; #endif } } /* * Divide by r here instead dividing the derivatives by r, * and update the gradient for atom j. */ df *= rinv; dedx = df * xij; dedy = df * yij; dedz = df * zij; dumx += dedx; dumy += dedy; dumz += dedz; f[3 * j + 0] -= dedx; f[3 * j + 1] -= dedy; f[3 * j + 2] -= dedz; } } /* For atom i, the gradient is updated in the i-loop only. */ f[3 * i + 0] += dumx; f[3 * i + 1] += dumy; f[3 * i + 2] += dumz; } /* Update the offsets into the pair list and the excluded atom list. */ lpair += npr; iexcl += nexcl; } /* Deallocate the iexw array. */ free_ivector(iexw, -1, prm->Natom); return (0); } /* * Calculate the generalized Born energy and first derivatives. * * Calling parameters are as follows: * * natom - input: the number of atoms * x - input: the atomic (x,y,z) coordinates * f - updated: the gradient vector * fs - input: overlap parameters * rborn - input: atomic radii * q - input: atomic charges * kappa - input: inverse of the Debye-Huckel length * diel_ext - input: solvent dielectric constant * enb - updated: Lennard-Jones energy * eelt - updated: gas-phase electrostatic energy */ REAL_T egb( int *natom, REAL_T *x, REAL_T *f, REAL_T *fs, REAL_T *rborn, REAL_T *q, REAL_T *kappa, REAL_T *diel_ext, REAL_T *enb, REAL_T *eelt ) #define BOFFSET 0.09 #define KSCALE 0.73 #define PIx4 12.5663706143591724639918 #define PIx2 6.2831853071795862319959 #define PIx1 3.1415926535897931159979 { #ifdef MULTIPLE_THREADS int iexcl_last; #endif int i, i3, j, j3, k; int iexcl, jexcl, jexcl_last, nexcl, npairs, ic, iaci; int *iexw; REAL_T epol, dielfac, qi, qj, qiqj, fgbi, fgbk, rb2, expmkf; REAL_T elec, evdw, sumda, daix, daiy, daiz; REAL_T xi, yi, zi, xij, yij, zij; REAL_T dedx, dedy, dedz, de; REAL_T dij1i, dij3i, temp1; REAL_T qi2h, qid2h, datmp; REAL_T theta, ri1i, dij2i; REAL_T dij, sumi; REAL_T eel, f6, f12, rinv, r2inv, r6inv; REAL_T r2, ri, rj, sj, sj2, thi; REAL_T uij, efac, temp4, temp5, temp6; REAL_T *reff, *sumdeijda, *sumepol, *sumelec, *sumevdw, *psi; /* LCPO stuff follows */ int count, count2, icount; REAL_T si, sumAij, sumAjk, sumAijAjk, sumdAijddijdxi; REAL_T sumdAijddijdyi, sumdAijddijdzi, sumdAijddijdxiAjk; REAL_T sumdAijddijdyiAjk, sumdAijddijdziAjk, rij, tmpaij, Aij, dAijddij; REAL_T dAijddijdxj, dAijddijdyj, dAijddijdzj; REAL_T sumdAjkddjkdxj, sumdAjkddjkdyj, sumdAjkddjkdzj, p3p4Aij; REAL_T xk, yk, zk, rjk2, djk1i, rjk, vdw2dif, tmpajk, Ajk, sumAjk2, dAjkddjk; REAL_T dAjkddjkdxj, dAjkddjkdyj, dAjkddjkdzj, lastxj, lastyj, lastzj; REAL_T dAidxj, dAidyj, dAidzj, Ai, dAidxi, dAidyi, dAidzi; REAL_T totsasa; REAL_T xj, yj, zj; REAL_T dumbo,tmpsd; REAL_T rgbmax1i, rgbmax2i, rgbmaxpsmax2; /* arrays and vars for use in vectorizable loops */ int icount2; REAL_T *vfgbi, *vefac,*r2x,*vrinv,*vexpmkf,*vectmp1; REAL_T kkscale,invdiel_ext; /*FGB taylor coefficients follow */ /* from A to H : */ /* 1/3 , 2/5 , 3/7 , 4/9 , 5/11 */ /* 4/3 , 12/5 , 24/7 , 40/9 , 60/11 */ #define TA 0.33333333333333333333 #define TB 0.4 #define TC 0.42857142857142857143 #define TD 0.44444444444444444444 #define TDD 0.45454545454545454545 #define TE 1.33333333333333333333 #define TF 2.4 #define TG 3.42857142857142857143 #define TH 4.44444444444444444444 #define THH 5.45454545454545454545 /* * Smooth "cut-off" in calculating GB effective radii. * Implementd by Andreas Svrcek-Seiler and Alexey Onufriev. * The integration over solute is performed up to rgbmax and includes * parts of spheres; that is an atom is not just "in" or "out", as * with standard non-bonded cut. As a result, calclated effective * radii are less than rgbmax. This saves time, and there is no * discontinuity in dReff/drij. * * Only the case rgbmax > 5*max(sij) = 5*fsmax ~ 9A is handled; this is * enforced in mdread(). Smaller values would not make much physical * sense anyway. * * Note: rgbmax must be less than or equal to cut so that the pairlist * generated from cut may be applied to calculation of the effective * radius and its derivatives. */ if (rgbmax > cut) { printf("Error in egb: rgbmax = %f is greater than cutoff = %f\n", rgbmax, cut); exit(1); } rgbmax1i = 1.0/rgbmax; rgbmax2i = rgbmax1i*rgbmax1i; rgbmaxpsmax2 = (rgbmax+prm->Fsmax)*(rgbmax+prm->Fsmax); kkscale = -(*kappa)*KSCALE; invdiel_ext = 1./(*diel_ext); /* allocate arrays for use in vectorizable loops */ vfgbi = vector(0, *natom); vefac = vector(0, *natom); r2x = vector(0, *natom); vrinv = vector(0, *natom); vexpmkf = vector(0, *natom); vectmp1 = vector(0, *natom); sumepol = vector(0, *natom); sumelec = vector(0, *natom); sumevdw = vector(0, *natom); /* Allocate and initialize the arrays used for energy and derivatives. */ if ( gb==2 || gb==5 ) psi = vector(0, *natom); reff = vector(0, *natom); sumdeijda = vector(0, *natom); for (i = 0; i < *natom; i++) { sumdeijda[i] = 0.0; sumepol[i] = 0.0; sumelec[i] = 0.0; sumevdw[i] = 0.0; } count = 0; totsasa = 0.0; /* * Get the "effective" Born radii via the approximate pairwise method * Use Eqs 9-11 of Hawkins, Cramer, Truhlar, J. Phys. Chem. 100:19824 * (1996). * * The computation is not executed in parallel for the case of gbsa==1 * due to the loop-carried dependence of the 'count' variable. */ if (gb_debug) printf("Effective Born radii:\n"); #ifdef MULTIPLE_THREADS #pragma omp parallel for if (gbsa != 1) \ private (i, xi, yi, zi, ri, ri1i, sumi, j, k, xij, yij, zij, \ r2, dij1i, dij, sj, sj2, uij, dij2i, tmpsd, dumbo, theta) #endif /* Loop over all atoms i. */ for (i = 0; i < *natom; i++) { xi = x[3 * i]; yi = x[3 * i + 1]; zi = x[3 * i + 2]; ri = rborn[i] - BOFFSET; ri1i = 1. / ri; sumi = 0.; /* Select atom j from the pair list. */ for (k = lprstr[i]; k < lprstr[i] + lpairs[i] + upairs[i]; k++) { j = pairlist[k]; xij = xi - x[3 * j]; yij = yi - x[3 * j + 1]; zij = zi - x[3 * j + 2]; r2 = xij * xij + yij * yij + zij * zij; if ( r2 > rgbmaxpsmax2 ) continue; dij1i = 1.0 / sqrt(r2); dij = r2 * dij1i; sj = fs[j] * (rborn[j] - BOFFSET); sj2 = sj * sj; /* * ---following are from the Appendix of Schaefer and Froemmel, * JMB 216:1045-1066, 1990; Taylor series expansion for d>>s * is by Andreas Svrcek-Seiler; smooth rgbmax idea is from * Andreas Svrcek-Seiler and Alexey Onufrie. */ if( dij > rgbmax + sj ) continue; if ((dij > rgbmax - sj)) { uij = 1./(dij -sj); sumi -= 0.125 * dij1i * (1.0 + 2.0 * dij *uij + rgbmax2i * (r2 - 4.0 * rgbmax * dij - sj2) + 2.0 * log((dij-sj)*rgbmax1i)); } else if (dij >4.0*sj) { dij2i = dij1i*dij1i; tmpsd = sj2*dij2i; dumbo = TA+tmpsd*(TB+tmpsd*(TC+tmpsd*(TD+tmpsd*TDD))); sumi -= sj*tmpsd*dij2i*dumbo; } else if (dij > ri + sj) { sumi -= 0.5 * (sj / (r2 - sj2) + 0.5 * dij1i * log((dij - sj) / (dij + sj))); } else if (dij > fabs(ri - sj)) { theta = 0.5 * ri1i * dij1i * (r2 + ri * ri - sj2); uij = 1. / (dij + sj); sumi -= 0.25 * (ri1i * (2. - theta) - uij + dij1i * log(ri * uij)); } else if (ri < sj) { sumi -= 0.5 * (sj / (r2 - sj2) + 2. * ri1i + 0.5 * dij1i * log((sj - dij) / (sj + dij))); } if (gbsa == 1) { if ((P0[i] + P0[j]) > dij) { if (P0[i] > 2.5 && P0[j] > 2.5) { ineighbor[count] = j + 1; /* this +1 is VERY important */ count += 1; } } } } if( gb==1 ){ /* "standard" (HCT) effective radii: */ reff[i] = 1.0/(ri1i + sumi); if( reff[i] < 0.0 ) reff[i] = 30.0; } else { /* "gbao" formulas: */ psi[i] = -ri*sumi; reff[i] = 1.0 / (ri1i - tanh( (gbalpha - gbbeta*psi[i] + gbgamma*psi[i]*psi[i]) *psi[i])/rborn[i] ); } if (gb_debug) printf("%d\t%15.7f\t%15.7f\n", i + 1, rborn[i], reff[i]); if (gbsa == 1) { ineighbor[count] = 0; count = count + 1; } } /* * Main LCPO stuff follows. No parallelization using OpenMP due to * the loop-carried dependence of the 'count variable. */ if (gbsa == 1) { totsasa = 0.0; count = 0; for (i = 0; i < *natom; i++) { if (ineighbor[count] == 0) { count = count + 1; } else { si = PIx4 * P0[i] * P0[i]; sumAij = 0.0; sumAjk = 0.0; sumAjk2 = 0.0; sumAijAjk = 0.0; sumdAijddijdxi = 0.0; sumdAijddijdyi = 0.0; sumdAijddijdzi = 0.0; sumdAijddijdxiAjk = 0.0; sumdAijddijdyiAjk = 0.0; sumdAijddijdziAjk = 0.0; icount = count; L70:j = ineighbor[count] - 1; /* mind the -1 , fortran again */ xi = x[3 * i]; yi = x[3 * i + 1]; zi = x[3 * i + 2]; xj = x[3 * j]; yj = x[3 * j + 1]; zj = x[3 * j + 2]; r2 = (xi - xj) * (xi - xj) + (yi - yj) * (yi - yj) + (zi - zj) * (zi - zj); dij1i = 1. / sqrt(r2); rij = r2 * dij1i; tmpaij = P0[i] - rij * 0.5 - (P0[i] * P0[i] - P0[j] * P0[j]) * 0.5 * dij1i; Aij = PIx2 * P0[i] * tmpaij; dAijddij = PIx1 * P0[i] * (dij1i * dij1i * (P0[i] * P0[i] - P0[j] * P0[j]) - 1.0); dAijddijdxj = dAijddij * (xj - xi) * dij1i; dAijddijdyj = dAijddij * (yj - yi) * dij1i; dAijddijdzj = dAijddij * (zj - zi) * dij1i; sumAij = sumAij + Aij; count2 = icount; sumAjk2 = 0.0; sumdAjkddjkdxj = 0.0; sumdAjkddjkdyj = 0.0; sumdAjkddjkdzj = 0.0; p3p4Aij = -surften * (P3[i] + P4[i] * Aij); L80:k = ineighbor[count2] - 1; /*same as above, -1 ! */ if (j == k) goto L85; xk = x[3 * k]; yk = x[3 * k + 1]; zk = x[3 * k + 2]; rjk2 = (xj - xk) * (xj - xk) + (yj - yk) * (yj - yk) + (zj - zk) * (zj - zk); djk1i = 1.0 / sqrt(rjk2); rjk = rjk2 * djk1i; if (P0[j] + P0[k] > rjk) { vdw2dif = P0[j] * P0[j] - P0[k] * P0[k]; tmpajk = 2.0 * P0[j] - rjk - vdw2dif * djk1i; Ajk = PIx1 * P0[j] * tmpajk; sumAjk = sumAjk + Ajk; sumAjk2 = sumAjk2 + Ajk; dAjkddjk = PIx1 * P0[j] * djk1i * (djk1i * djk1i * vdw2dif - 1.0); dAjkddjkdxj = dAjkddjk * (xj - xk); dAjkddjkdyj = dAjkddjk * (yj - yk); dAjkddjkdzj = dAjkddjk * (zj - zk); f[3 * k] = f[3 * k] + dAjkddjkdxj * p3p4Aij; f[3 * k + 1] = f[3 * k + 1] + dAjkddjkdyj * p3p4Aij; f[3 * k + 2] = f[3 * k + 2] + dAjkddjkdzj * p3p4Aij; sumdAjkddjkdxj = sumdAjkddjkdxj + dAjkddjkdxj; sumdAjkddjkdyj = sumdAjkddjkdyj + dAjkddjkdyj; sumdAjkddjkdzj = sumdAjkddjkdzj + dAjkddjkdzj; } L85:count2 = count2 + 1; if (ineighbor[count2] != 0) { goto L80; } else { count2 = icount; } sumAijAjk = sumAijAjk + Aij * sumAjk2; sumdAijddijdxi = sumdAijddijdxi - dAijddijdxj; sumdAijddijdyi = sumdAijddijdyi - dAijddijdyj; sumdAijddijdzi = sumdAijddijdzi - dAijddijdzj; sumdAijddijdxiAjk = sumdAijddijdxiAjk - dAijddijdxj * sumAjk2; sumdAijddijdyiAjk = sumdAijddijdyiAjk - dAijddijdyj * sumAjk2; sumdAijddijdziAjk = sumdAijddijdziAjk - dAijddijdzj * sumAjk2; lastxj = dAijddijdxj * sumAjk2 + Aij * sumdAjkddjkdxj; lastyj = dAijddijdyj * sumAjk2 + Aij * sumdAjkddjkdyj; lastzj = dAijddijdzj * sumAjk2 + Aij * sumdAjkddjkdzj; dAidxj = surften * (P2[i] * dAijddijdxj + P3[i] * sumdAjkddjkdxj + P4[i] * lastxj); dAidyj = surften * (P2[i] * dAijddijdyj + P3[i] * sumdAjkddjkdyj + P4[i] * lastyj); dAidzj = surften * (P2[i] * dAijddijdzj + P3[i] * sumdAjkddjkdzj + P4[i] * lastzj); f[3 * j] = f[3 * j] + dAidxj; f[3 * j + 1] = f[3 * j + 1] + dAidyj; f[3 * j + 2] = f[3 * j + 2] + dAidzj; count = count + 1; if (ineighbor[count] != 0) { goto L70; } else { count = count + 1; } Ai = P1[i] * si + P2[i] * sumAij + P3[i] * sumAjk + P4[i] * sumAijAjk; dAidxi = surften * (P2[i] * sumdAijddijdxi + P4[i] * sumdAijddijdxiAjk); dAidyi = surften * (P2[i] * sumdAijddijdyi + P4[i] * sumdAijddijdyiAjk); dAidzi = surften * (P2[i] * sumdAijddijdzi + P4[i] * sumdAijddijdziAjk); f[3 * i] = f[3 * i] + dAidxi; f[3 * i + 1] = f[3 * i + 1] + dAidyi; f[3 * i + 2] = f[3 * i + 2] + dAidzi; totsasa = totsasa + Ai; /* printf("totsasa up to %d %f\n",i,totsasa); */ } } /* printf("SASA %f , ESURF %f \n",totsasa,totsasa*surften); */ esurf = totsasa * surften; } /* * Compute the GB, Coulomb and Lennard-Jones energies and derivatives. * When in multi-threaded mode in this nest of loops sum data relative only * To atom i so that OpenMP can be used to parallelize the outer loop. * Perform excluded list management when in single-threaded mode only. */ #ifndef MULTIPLE_THREADS iexcl = 0; #endif #ifdef MULTIPLE_THREADS #pragma omp parallel \ private (i, i3, ri, qi, qj, expmkf, dielfac, qi2h, qid2h, \ iaci, xi, yi, zi, k, j, j3, xij, yij, zij, r2, qiqj, \ rj, rb2, efac, fgbi, fgbk, temp4, temp6, eel, de, temp5, \ rinv, r2inv, ic, r6inv, f6, f12, dedx, dedy, dedz, \ epol, elec, evdw, jexcl, jexcl_last, iexw, xj, yj, zj, \ sumda, thi, ri1i, dij1i, datmp, daix, daiy, daiz, \ dij2i, dij, sj, sj2, temp1, dij3i, tmpsd, dumbo, \ iexcl, iexcl_last, nexcl, npairs) #endif { /* * Allocate and initialize the iexw array used for skipping excluded * atoms. Note that because of the manner in which iexw is used, it * is necessary to initialize it before only the first iteration of * the following loop. */ iexw = ivector(-1, *natom); for (i = -1; i < *natom; i++) { iexw[i] = -1; } #ifdef MULTIPLE_THREADS #pragma omp for schedule (dynamic, 8) #endif /* Loop over all atoms i. */ for (i = 0; i < *natom; i++) { ri = reff[i]; qi = q[i]; /* * If atom i is not frozen, compute the "diagonal" energy that * is a function of only the effective radius Ri but not of the * interatomic distance Dij. Compute also the contribution of * the diagonal energy term to the sum by which the derivative * of Ri will be multiplied. */ if (!frozen[i]) { expmkf = exp(-KSCALE * (*kappa) * ri) / (*diel_ext); dielfac = 1.0 - expmkf; qi2h = 0.5 * qi * qi; qid2h = qi2h * dielfac; sumepol[i] += -qid2h / ri; sumdeijda[i] += qid2h - KSCALE * (*kappa) * qi2h * expmkf *ri; } /* * Skip the pair calculations if there are no atoms j on the * pair list of atom i. */ nexcl = prm->Iblo[i]; npairs = upairs[i]; if ( npairs > 0 ) { i3 = 3 * i; xi = x[i3 ]; yi = x[i3 + 1]; zi = x[i3 + 2]; iaci = prm->Ntypes * (prm->Iac[i] - 1); /* * Expand the excluded atom list into the iexw array by storing i * at array address j. The starting address is either iexcl that * gets updated with each iteration of the i-loop, or Istr[i] that * has been created in the mme_init function to permit parallelization. */ #ifndef MULTIPLE_THREADS jexcl = iexcl; #else jexcl = Istr[i]; #endif jexcl_last = jexcl + nexcl; for (j = jexcl; j < jexcl_last; j++) { iexw[prm->ExclAt[j] - 1] = i; } epol = elec = evdw = 0.0; /* Select atoms j from the pair list. */ icount2 = 0; for (k = uprstr[i]; k < uprstr[i] + npairs; k++) { j = pairlist[k]; j3 = 3 * j; xij = xi - x[j3 ]; yij = yi - x[j3 + 1]; zij = zi - x[j3 + 2]; r2 = xij * xij + yij * yij + zij * zij; rj = reff[j]; rb2 = ri*rj; vfgbi[icount2] = rb2; vefac[icount2] = -0.25*r2/rb2; r2x[icount2] = r2; icount2++; } for (k=0; k < icount2 ; k++) vefac[k] = exp(vefac[k]); /* efac */ for (k=0; k < icount2 ; k++) vrinv[k] = 1./sqrt(r2x[k]); /* inverse distances */ for (k=0; k < icount2 ; k++) vfgbi[k] = vfgbi[k]*vefac[k] + r2x[k]; for (k=0; k < icount2 ; k++) vfgbi[k] = 1./sqrt(vfgbi[k]); /* vfgbi complete */ for (k=0; k < icount2 ; k++) vexpmkf[k] = kkscale/vfgbi[k]; for (k=0; k < icount2 ; k++) vectmp1[k] = 1.0 - vexpmkf[k]; for (k=0; k < icount2 ; k++) vexpmkf[k] = exp(vexpmkf[k]) * invdiel_ext; /* expmkf complete */ for (k=0; k < icount2 ; k++) vectmp1[k] = 1.0 - vexpmkf[k]* vectmp1[k]; for (k=0; k < icount2 ; k++) vectmp1[k] = vectmp1[k]*vfgbi[k]*vfgbi[k]*vfgbi[k]; /* now contains temp6 without 'qiqj' */ icount2 = 0; for (k = uprstr[i]; k < uprstr[i] + npairs; k++) { j = pairlist[k]; j3 = 3 * j; /* Continue computing the non-diagonal energy term. */ xij = xi - x[j3 ]; yij = yi - x[j3 + 1]; zij = zi - x[j3 + 2]; r2 = r2x[icount2]; qiqj = qi * q[j]; rj = reff[j]; rb2 = ri * rj; efac = vefac[icount2]; fgbi = vfgbi[icount2]; fgbk = kkscale / fgbi; expmkf = vexpmkf[icount2]; dielfac = 1.0 - expmkf; epol += -qiqj * dielfac * fgbi; temp6 = qiqj*vectmp1[icount2]; de = temp6 * (1.0 - 0.25 * efac); temp5 = 0.5 * efac * temp6 * (rb2 + 0.25 * r2); /* * Compute the contribution of the non-diagonal energy term to the sum * by which the derivatives of Ri and Rj will be multiplied. When in * single-threaded mode sum the contribution for the derivatives of Ri * and Rj. When in multi-threaded mode sum the contribution for the * derivative of R. */ sumdeijda[i] += ri * temp5; #ifndef MULTIPLE_THREADS sumdeijda[j] += rj * temp5; #endif /* * Compute the Van der Waals and Coulombic energies for only * those pairs that are not on the excluded atom list. Any * pair on the excluded atom list will have atom i stored at * address j of the iexw array. It is not necessary to reset * the elements of the iexw array to -1 between successive * iterations in i because an i,j pair is uniquely identified * by atom i stored at array address j. Thus for example, the * i+1,j pair would be stored at the same address as the i,j * pair but after the i,j pair were used. */ if (iexw[j] != i) { rinv = vrinv[icount2]; r2inv = rinv * rinv; /* gas-phase Coulomb energy: */ eel = qiqj * rinv; elec += eel; de -= eel * r2inv; /* Lennard-Jones energy: */ ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6inv = r2inv * r2inv * r2inv; f6 = prm->Cn2[ic] * r6inv; f12 = prm->Cn1[ic] * r6inv * r6inv; evdw += f12 - f6; de -= (12. * f12 - 6. * f6) * r2inv; } } icount2 ++; /* * When in single-threaded mode sum to the gradient vector the * derivatives of Dij that are computed relative to the cartesian * coordinates of atoms i and j. When in multi-threaded mode sum * to the gradient vector the derivatives that are computed relative * to the cartesian coordinates of atom i. */ dedx = de * xij; dedy = de * yij; dedz = de * zij; f[i3 ] += dedx; f[i3 + 1] += dedy; f[i3 + 2] += dedz; #ifndef MULTIPLE_THREADS f[j3 ] -= dedx; f[j3 + 1] -= dedy; f[j3 + 2] -= dedz; #endif } /* Sum the energy terms. */ sumepol[i] += epol; sumelec[i] += elec; sumevdw[i] += evdw; } /* * When in single-threaded mode update iexcl. This update is unnecessary * in multi-threaded mode because jexcl is loaded from Istr[i] not iexcl. */ #ifndef MULTIPLE_THREADS iexcl += nexcl; #endif } #ifdef MULTIPLE_THREADS /* * Initialize the iexw array used for skipping excluded atoms. * Note that because of the manner in which iexw is used, it * is necessary to initialize it before only the first iteration * of the following loop. */ for (i = -1; i < *natom; i++) { iexw[i] = -1; } /* * When in multi-threaded mode in this nest of loops sum data * relative only to atom j so that OpenMP can be used to parallelize * the outer loop. * * The control flow in this nest of loops must be identical to * that of the preceeding nest of loops. Perform the minimum * computation required to replicate the control flow of the * preceeding nest of loops. */ #pragma omp for schedule (dynamic, 8) /* Loop over all atoms j. */ for (j = 0; j < *natom; j++) { /* * Skip the pair calculations if there are no atoms i on the * pair list of atom j. */ nexcl = Jblo[j]; npairs = lpairs[j]; if ( npairs > 0 ) { j3 = 3 * j; rj = reff[j]; qj = q[j]; xj = x[j3 ]; yj = x[j3 + 1]; zj = x[j3 + 2]; /* * Expand the excluded list into the iexw array by storing j * at array address i. */ iexcl = Jstr[j]; iexcl_last = iexcl + nexcl; for (i = iexcl; i < iexcl_last; i++) { iexw[JExclAt[i] - 1] = j; } /* Select atoms i from the pair list. */ for (k = lprstr[j]; k < lprstr[j] + npairs; k++) { i = pairlist[k]; i3 = 3 * i; iaci = prm->Ntypes * (prm->Iac[i] - 1); xij = x[i3 ] - xj; yij = x[i3 + 1] - yj; zij = x[i3 + 2] - zj; r2 = xij * xij + yij * yij + zij * zij; qiqj = q[i] * qj; ri = reff[i]; rb2 = ri * rj; efac = exp(-r2 / (4.0 * rb2)); fgbi = 1.0 / sqrt(r2 + rb2 * efac); fgbk = -(*kappa) * KSCALE / fgbi; expmkf = exp(fgbk) / (*diel_ext); dielfac = 1.0 - expmkf; epol += -qiqj * dielfac * fgbi; temp4 = fgbi * fgbi * fgbi; temp6 = qiqj * temp4 * (dielfac + fgbk * expmkf); de = temp6 * (1.0 - 0.25 * efac); temp5 = 0.5 * efac * temp6 * (rb2 + 0.25 * r2); /* * Compute the contribution of the non-diagonal energy term to the sum * by which the derivatives of Ri and Rj will be multiplied. When in * single-threaded mode sum the contribution for the derivatives of Ri * and Rj. When in multi-threaded mode sum the contribution for the * derivative of Ri. */ sumdeijda[j] += rj * temp5; /* * Compute the Van der Waals and Coulombic energies for only * those pairs that are not on the excluded atom list. Any * pair on the excluded atom list will have atom j stored at * address i of the iexw array. It is not necessary to reset * the elements of the iexw array to -1 between successive * iterations in j because an i,j pair is uniquely identified * by atom j stored at array address i. Thus for example, the * i,j+1 pair would be stored at the same address as the i,j * pair but after the i,j pair were used. */ if (iexw[i] != j) { rinv = 1. / sqrt(r2); r2inv = rinv * rinv; /* gas-phase Coulomb energy: */ eel = qiqj * rinv; elec += eel; de -= eel * r2inv; /* Lennard-Jones energy: */ ic = prm->Cno[iaci + prm->Iac[j] - 1] - 1; if( ic >= 0 ){ r6inv = r2inv * r2inv * r2inv; f6 = prm->Cn2[ic] * r6inv; f12 = prm->Cn1[ic] * r6inv * r6inv; evdw += f12 - f6; de -= (12. * f12 - 6. * f6) * r2inv; } } /* * Sum to the gradient vector the derivatives that are computed * relative to the cartesian coordinates of atom j. */ f[j3 ] -= de * xij; f[j3 + 1] -= de * yij; f[j3 + 2] -= de * zij; } } } #endif /* Free the iexw array within this potentially parallel region of code. */ free_ivector(iexw, -1, *natom); /* * Compute the derivatives of the effective radius Ri of atom i * with respect to the cartesian coordinates of each atom j. * * When in single-threaded mode sum all of these derivatives into the * gradient vector. When in multi-threaded mode sum into the gradient * vector only the derivatives with respect to the cartesian coordinates * of atom i (which are in fact the sums of all other derivatives). */ #ifdef MULTIPLE_THREADS #pragma omp for #endif /* Loop over all atoms i. */ for (i = 0; i < *natom; i++) { /* * Don't calculate derivatives of the effective radius of atom i * if atom i is frozen or if there are no pair atoms j associated * with atom i. */ npairs = lpairs[i] + upairs[i]; if ( frozen[i] || !npairs ) continue; i3 = 3 * i; xi = x[i3 ]; yi = x[i3 + 1]; zi = x[i3 + 2]; ri = rborn[i] - BOFFSET; ri1i = 1. / ri; sumda = sumdeijda[i]; if ( gb>1 ) { ri = rborn[i] - BOFFSET; thi = tanh( (gbalpha - gbbeta*psi[i] + gbgamma*psi[i]*psi[i])*psi[i] ); sumda *= (gbalpha -2.0*gbbeta*psi[i] + 3.0*gbgamma*psi[i]*psi[i]) *(1.0 - thi*thi)*ri/rborn[i]; } /* Initialize the derivative accumulators. */ daix = daiy = daiz = 0.0; /* Select atom j from the pair list. */ for (k = lprstr[i]; k < lprstr[i] + npairs; k++) { j = pairlist[k]; j3 = 3 * j; xij = xi - x[j3 ]; yij = yi - x[j3 + 1]; zij = zi - x[j3 + 2]; r2 = xij * xij + yij * yij + zij * zij; if ( r2 > rgbmaxpsmax2 ) continue; dij1i = 1.0 / sqrt(r2); dij2i = dij1i * dij1i; dij = r2 * dij1i; sj = fs[j] * (rborn[j] - BOFFSET); sj2 = sj * sj; /* * The following are the numerator of the first derivatives of the * effective radius Ri with respect to the interatomic distance Dij. * They are derived from the equations from the Appendix of Schaefer * and Froemmel as well as from the Taylor series expansion for d>>s * by Andreas Svrcek-Seiler. The smooth rgbmax idea is from Andreas * Svrcek-Seiler and Alexey Onufriev. The complete derivative is * formed by multiplying the numerator by -Ri*Ri. The factor of Ri*Ri * has been moved to the terms that are multiplied by the derivative. * The negation is deferred until later. When the chain rule is used * to form the first derivatives of the effective radius with respect * to the cartesian coordinates, an additional factor of Dij appears * in the denominator. That factor is included in the following * expressions. */ if ( dij > rgbmax + sj ) continue; if( dij > rgbmax - sj ){ temp1 = 1./(dij-sj); dij3i = dij1i * dij2i; datmp = 0.125 * dij3i * ((r2 + sj2) * (temp1*temp1 - rgbmax2i) - 2.0 * log(rgbmax*temp1)); } else if(dij >4.0*sj) { tmpsd = sj2*dij2i; dumbo = TE + tmpsd*(TF+tmpsd*(TG+tmpsd*(TH+tmpsd*THH))); datmp = tmpsd*sj*dij2i*dij2i*dumbo; } else if (dij > ri + sj) { temp1 = 1. / (r2 - sj2); datmp = temp1 * sj * (-0.5 * dij2i + temp1) + 0.25 * dij1i * dij2i * log((dij - sj) / (dij + sj)); } else if (dij > fabs(ri - sj)) { temp1 = 1. / (dij + sj); dij3i = dij1i * dij1i * dij1i; datmp = -0.25 * (-0.5 * (r2 - ri * ri + sj2) * dij3i * ri1i * ri1i + dij1i * temp1 * (temp1 - dij1i) - dij3i * log(ri * temp1)); } else if (ri < sj) { temp1 = 1. / (r2 - sj2); datmp = -0.5 * (sj * dij2i * temp1 - 2. * sj * temp1 * temp1 - 0.5 * dij2i * dij1i * log((sj - dij) / (sj + dij))); } else { datmp = 0.; } /* Sum the derivatives into daix, daiy and daiz. */ daix += xij * datmp; daiy += yij * datmp; daiz += zij * datmp; /* * When in single-threaded mode sum the derivatives relative to * atom j (weighted by -sumdeijda[i]) into the gradient vector. * For example, f[j3 + 2] contains the derivatives of Ri with * respect to the z-coordinate of atom j. */ #ifndef MULTIPLE_THREADS datmp *= sumda; f[j3 ] += xij * datmp; f[j3 + 1] += yij * datmp; f[j3 + 2] += zij * datmp; #endif } /* * Update the gradient vector with the sums of derivatives of the * effective radius Ri with respect to the cartesian coordinates. * For example, f[i3 + 1] contains the sum of derivatives of Ri * with respect to the y-coordinate of each atom. Multiply by * -sumdeijda[i] here (instead of merely using datmp multiplied by * -sumdeijda) in order to distribute the product across the sum of * derivatives in an attempt to obtain greater numeric stability. */ f[i3 ] -= sumda * daix; f[i3 + 1] -= sumda * daiy; f[i3 + 2] -= sumda * daiz; } /* * Compute the derivatives of the effective radius Ri of atom i * with respect to the cartesian coordinates of each atom j, and * sum all of these derivatives into the gradient vector. * * Use the nowait clause for the following 'for' loop because * there are implied barriers at the end of the loop and at the * end of the parallel region; hence, the barrier at the end of * the loop is redundant. */ #ifdef MULTIPLE_THREADS #pragma omp for nowait /* Loop over all atoms j. */ for (j = 0; j < *natom; j++) { /* * Don't calculate derivatives with respect to atom j if there * are no atoms i associated with atom j. */ npairs = lpairs[j] + upairs[j]; if ( !npairs ) continue; j3 = 3 * j; xj = x[j3 + 0]; yj = x[j3 + 1]; zj = x[j3 + 2]; sj = fs[j] * (rborn[j] - BOFFSET); sj2 = sj * sj; /* Select atom i from the pair list. */ for (k = lprstr[j]; k < lprstr[j] + npairs; k++) { i = pairlist[k]; /* * Don't calculate derivatives of the effective radius of atom i * if atom i is frozen. */ if ( frozen[i] ) continue; i3 = 3 * i; xij = x[i3 + 0] - xj; yij = x[i3 + 1] - yj; zij = x[i3 + 2] - zj; r2 = xij * xij + yij * yij + zij * zij; /* Ignore the ij atom pair if their separation exceeds the GB cutoff. */ if ( r2 > rgbmaxpsmax2 ) continue; dij1i = 1.0 / sqrt(r2); dij2i = dij1i * dij1i; dij = r2 * dij1i; ri = rborn[i] - BOFFSET; ri1i = 1. / ri; /* * The following are the numerator of the first derivatives of the * effective radius Ri with respect to the interatomic distance Dij. * They are derived from the equations from the Appendix of Schaefer * and Froemmel as well as from the Taylor series expansion for d>>s * by Andreas Svrcek-Seiler. The smooth rgbmax idea is from Andreas * Svrcek-Seiler and Alexey Onufriev. The complete derivative is * formed by multiplying the numerator by -Ri*Ri. The factor of Ri*Ri * has been moved to the terms that are multiplied by the derivative. * The negation is deferred until later. When the chain rule is used * to form the first derivatives of the effective radius with respect * to the cartesian coordinates, an additional factor of Dij appears * in the denominator. That factor is included in the following * expressions. */ if ( dij > rgbmax + sj ) continue; if( dij > rgbmax - sj ){ temp1 = 1./(dij-sj); dij3i = dij1i * dij2i; datmp = 0.125 * dij3i * ((r2 + sj2) * (temp1*temp1 - rgbmax2i) - 2.0 * log(rgbmax*temp1)); } else if(dij >4.0*sj) { tmpsd = sj2*dij2i; dumbo = TE + tmpsd*(TF+tmpsd*(TG+tmpsd*(TH+tmpsd*THH))); datmp = tmpsd*sj*dij2i*dij2i*dumbo; } else if (dij > ri + sj) { temp1 = 1. / (r2 - sj2); datmp = temp1 * sj * (-0.5 * dij2i + temp1) + 0.25 * dij1i * dij2i * log((dij - sj) / (dij + sj)); } else if (dij > fabs(ri - sj)) { temp1 = 1. / (dij + sj); dij3i = dij1i * dij1i * dij1i; datmp = -0.25 * (-0.5 * (r2 - ri * ri + sj2) * dij3i * ri1i * ri1i + dij1i * temp1 * (temp1 - dij1i) - dij3i * log(ri * temp1)); } else if (ri < sj) { temp1 = 1. / (r2 - sj2); datmp = -0.5 * (sj * dij2i * temp1 - 2. * sj * temp1 * temp1 - 0.5 * dij2i * dij1i * log((sj - dij) / (sj + dij))); } else { datmp = 0.; } /* * Compute the derivatives of the effective radius Ri of atom i * with respect to the cartesian coordinates of atom j. Sum * these derivatives into the gradient vector. * * The derivative of atom i is multiplied by sumdeijda[i] at * this point to minimize the number of multiplies. The * negation from -Ri*Ri is applied at this point. */ sumda = sumdeijda[i]; if ( gb>1 ) { ri = rborn[i] - BOFFSET; thi = tanh( (gbalpha - gbbeta*psi[i] + gbgamma*psi[i]*psi[i])*psi[i] ); sumda *= (gbalpha -2.0*gbbeta*psi[i] + 3.0*gbgamma*psi[i]*psi[i]) *(1.0 - thi*thi)*ri/rborn[i]; } datmp *= -sumda; f[j3 + 0] -= xij * datmp; f[j3 + 1] -= yij * datmp; f[j3 + 2] -= zij * datmp; } } #endif } /* Perform non-logarithmic reduction of the energy terms. */ epol = elec = evdw = 0.0; for (i = 0; i < *natom; i++) { epol += sumepol[i]; elec += sumelec[i]; evdw += sumevdw[i]; } /* Return elec and evdw through the reference parameters eelt and enb. */ *eelt = elec; *enb = evdw; if (gb_debug > 1) { printf("gb epol= %10.3f\n", epol); for (k = 0; k < *natom; k++) { printf("%15.7f\n", sumdeijda[k]); } for (k = 0; k < *natom * 3; k += 3) { printf("%15.7f%15.7f%15.7f\n", f[k], f[k + 1], f[k + 2]); } } free_vector(reff, 0, *natom); free_vector(sumepol, 0, *natom); free_vector(sumelec, 0, *natom); free_vector(sumevdw, 0, *natom); free_vector(sumdeijda, 0, *natom); if( gb == 2 || gb==5 ) free_vector(psi, 0, *natom); free_vector (vfgbi, 0, *natom); free_vector (vefac, 0,*natom); free_vector (r2x, 0, *natom); free_vector (vrinv, 0, *natom); free_vector (vexpmkf, 0, *natom); free_vector (vectmp1, 0, *natom); return (epol); }