/* Copyright (C) 1999, 2000, 2001, 2002, Massachusetts Institute of Technology. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include #include "../config.h" #include #include "maxwell.h" /**************************************************************************/ /* assign a = v going from transverse to cartesian coordinates. Here, a = (a[0],a[1],a[2]) is in cartesian coordinates. (v[0],v[vstride]) is in the transverse basis of k.m and k.n. */ static void assign_t2c(scalar *a, const k_data k, const scalar *v, int vstride) { scalar v0 = v[0], v1 = v[vstride]; ASSIGN_SCALAR(a[0], SCALAR_RE(v0)*k.mx + SCALAR_RE(v1)*k.nx, SCALAR_IM(v0)*k.mx + SCALAR_IM(v1)*k.nx); ASSIGN_SCALAR(a[1], SCALAR_RE(v0)*k.my + SCALAR_RE(v1)*k.ny, SCALAR_IM(v0)*k.my + SCALAR_IM(v1)*k.ny); ASSIGN_SCALAR(a[2], SCALAR_RE(v0)*k.mz + SCALAR_RE(v1)*k.nz, SCALAR_IM(v0)*k.mz + SCALAR_IM(v1)*k.nz); } /* assign a = k x v (cross product), going from transverse to cartesian coordinates. Here, a = (a[0],a[1],a[2]) and k = (k.kx,k.ky,k.kz) are in cartesian coordinates. (v[0],v[vstride]) is in the transverse basis of k.m and k.n. */ static void assign_cross_t2c(scalar *a, const k_data k, const scalar *v, int vstride) { scalar v0 = v[0], v1 = v[vstride]; /* Note that k x m = |k| n, k x n = - |k| m. Therefore, k x v = k x (v0 m + v1 n) = (v0 n - v1 m) * |k|. */ ASSIGN_SCALAR(a[0], (SCALAR_RE(v0)*k.nx - SCALAR_RE(v1)*k.mx) * k.kmag, (SCALAR_IM(v0)*k.nx - SCALAR_IM(v1)*k.mx) * k.kmag); ASSIGN_SCALAR(a[1], (SCALAR_RE(v0)*k.ny - SCALAR_RE(v1)*k.my) * k.kmag, (SCALAR_IM(v0)*k.ny - SCALAR_IM(v1)*k.my) * k.kmag); ASSIGN_SCALAR(a[2], (SCALAR_RE(v0)*k.nz - SCALAR_RE(v1)*k.mz) * k.kmag, (SCALAR_IM(v0)*k.nz - SCALAR_IM(v1)*k.mz) * k.kmag); #ifdef DEBUG { real num; num = SCALAR_NORMSQR(a[0])+SCALAR_NORMSQR(a[1])+SCALAR_NORMSQR(a[2]); CHECK(!BADNUM(num), "yikes, crazy number!"); } #endif } /* assign v = scale * k x a (cross product), going from cartesian to transverse coordinates. Here, a = (a[0],a[1],a[2]) and k = (k.kx,k.ky,k.kz) are in cartesian coordinates. (v[0],v[vstride]) is in the transverse basis of k.m and k.n. */ static void assign_cross_c2t(scalar *v, int vstride, const k_data k, const scalar *a, real scale) { scalar a0 = a[0], a1 = a[1], a2 = a[2]; scalar at0, at1; /* First, compute at0 = a*m and at1 = a*n. (Components of a that are parallel to k are killed anyway by the cross product.) */ ASSIGN_SCALAR(at0, SCALAR_RE(a0)*k.mx + SCALAR_RE(a1)*k.my + SCALAR_RE(a2)*k.mz, SCALAR_IM(a0)*k.mx + SCALAR_IM(a1)*k.my + SCALAR_IM(a2)*k.mz); ASSIGN_SCALAR(at1, SCALAR_RE(a0)*k.nx + SCALAR_RE(a1)*k.ny + SCALAR_RE(a2)*k.nz, SCALAR_IM(a0)*k.nx + SCALAR_IM(a1)*k.ny + SCALAR_IM(a2)*k.nz); /* Now, k x a = k x (at0*m + at1*n) = (at0*n - at1*m) * |k|. */ scale *= k.kmag; /* combine scale factor and |k|*/ ASSIGN_SCALAR(v[0], - scale * SCALAR_RE(at1), - scale * SCALAR_IM(at1)); ASSIGN_SCALAR(v[vstride], scale * SCALAR_RE(at0), scale * SCALAR_IM(at0)); #ifdef DEBUG { real dummy = SCALAR_NORMSQR(v[0]) + SCALAR_NORMSQR(v[vstride]); CHECK(!BADNUM(dummy), "yikes, crazy number!"); } #endif } /* compute a = u x v, where a and u are in cartesian coordinates and v is in transverse coordinates. */ static void assign_ucross_t2c(scalar *a, const real u[3], const k_data k, const scalar *v, int vstride) { scalar v0 = v[0], v1 = v[vstride]; real vx_r, vy_r, vz_r; #ifdef SCALAR_COMPLEX real vx_i, vy_i, vz_i; #endif /* Note that v = (vx,vy,vz) = (v0 m + v1 n). */ vx_r = SCALAR_RE(v0)*k.mx + SCALAR_RE(v1)*k.nx; vy_r = SCALAR_RE(v0)*k.my + SCALAR_RE(v1)*k.ny; vz_r = SCALAR_RE(v0)*k.mz + SCALAR_RE(v1)*k.nz; #ifdef SCALAR_COMPLEX vx_i = SCALAR_IM(v0)*k.mx + SCALAR_IM(v1)*k.nx; vy_i = SCALAR_IM(v0)*k.my + SCALAR_IM(v1)*k.ny; vz_i = SCALAR_IM(v0)*k.mz + SCALAR_IM(v1)*k.nz; #endif ASSIGN_SCALAR(a[0], u[1] * vz_r - u[2] * vy_r, u[1] * vz_i - u[2] * vy_i); ASSIGN_SCALAR(a[1], u[2] * vx_r - u[0] * vz_r, u[2] * vx_i - u[0] * vz_i); ASSIGN_SCALAR(a[2], u[0] * vy_r - u[1] * vx_r, u[0] * vy_i - u[1] * vx_i); } /**************************************************************************/ void maxwell_compute_fft(int dir, maxwell_data *d, scalar *array, int howmany, int stride, int dist) { #ifdef HAVE_FFTW # ifdef SCALAR_COMPLEX # ifndef HAVE_MPI fftwnd(dir < 0 ? d->plan : d->iplan, howmany, (fftw_complex *) array, stride, dist, 0, 0, 0); # else /* HAVE_MPI */ CHECK(stride == howmany && dist == 1, "weird strides and dists don't work with fftwnd_mpi"); fftwnd_mpi(dir < 0 ? d->plan : d->iplan, howmany, (fftw_complex *) array, (fftw_complex *) NULL, FFTW_TRANSPOSED_ORDER); # endif /* HAVE_MPI */ # else /* not SCALAR_COMPLEX */ # ifndef HAVE_MPI if (dir > 0) rfftwnd_real_to_complex(d->iplan, howmany, (fftw_real *) array, stride, dist, 0, 0, 0); else rfftwnd_complex_to_real(d->plan, howmany, (fftw_complex *) array, stride, dist, 0, 0, 0); # else /* HAVE_MPI */ CHECK(stride == howmany && dist == 1, "weird strides and dists don't work with rfftwnd_mpi"); rfftwnd_mpi(dir < 0 ? d->plan : d->iplan, howmany, array, (scalar *) NULL, FFTW_TRANSPOSED_ORDER); # endif /* HAVE_MPI */ # endif /* not SCALAR_COMPLEX */ #else /* not HAVE_FFTW */ # error only FFTW ffts are supported #endif /* not HAVE_FFTW */ } /**************************************************************************/ /* assigns newv = matrix * oldv. matrix is symmetric and so is stored in "packed" format. */ void assign_symmatrix_vector(scalar_complex *newv, const symmetric_matrix matrix, const scalar_complex *oldv) { scalar_complex v0 = oldv[0], v1 = oldv[1], v2 = oldv[2]; #if defined(WITH_HERMITIAN_EPSILON) newv[0].re = matrix.m00 * v0.re; newv[0].im = matrix.m00 * v0.im; CACCUMULATE_SUM_MULT(newv[0], matrix.m01, v1); CACCUMULATE_SUM_MULT(newv[0], matrix.m02, v2); newv[1].re = matrix.m11 * v1.re; newv[1].im = matrix.m11 * v1.im; CACCUMULATE_SUM_CONJ_MULT(newv[1], matrix.m01, v0); CACCUMULATE_SUM_MULT(newv[1], matrix.m12, v2); newv[2].re = matrix.m22 * v2.re; newv[2].im = matrix.m22 * v2.im; CACCUMULATE_SUM_CONJ_MULT(newv[2], matrix.m02, v0); CACCUMULATE_SUM_CONJ_MULT(newv[2], matrix.m12, v1); #else newv[0].re = matrix.m00 * v0.re + matrix.m01 * v1.re + matrix.m02 * v2.re; newv[0].im = matrix.m00 * v0.im + matrix.m01 * v1.im + matrix.m02 * v2.im; newv[1].re = matrix.m01 * v0.re + matrix.m11 * v1.re + matrix.m12 * v2.re; newv[1].im = matrix.m01 * v0.im + matrix.m11 * v1.im + matrix.m12 * v2.im; newv[2].re = matrix.m02 * v0.re + matrix.m12 * v1.re + matrix.m22 * v2.re; newv[2].im = matrix.m02 * v0.im + matrix.m12 * v1.im + matrix.m22 * v2.im; #endif #ifdef DEBUG { real dummy; dummy = CSCALAR_NORMSQR(newv[0]) + CSCALAR_NORMSQR(newv[1]) + CSCALAR_NORMSQR(newv[2]); CHECK(!BADNUM(dummy), "yikes, crazy number!"); } #endif } /* compute the D field in position space from Hin, which holds the H field in Fourier space, for the specified bands; this amounts to taking the curl and then Fourier transforming. The output array, dfield, is fft_output_size x cur_num_bands x 3, where fft_output_size is the local spatial indices and 3 is the field components. Note: actually, this computes just (k+G) x H, whereas the actual D field is i/omega i(k+G) x H...so, we are really computing -omega*D, here. */ void maxwell_compute_d_from_H(maxwell_data *d, evectmatrix Hin, scalar_complex *dfield, int cur_band_start, int cur_num_bands) { scalar *fft_data = (scalar *) dfield; int i, j, b; CHECK(Hin.c == 2, "fields don't have 2 components!"); CHECK(d, "null maxwell data pointer!"); CHECK(dfield, "null field output data!"); CHECK(cur_band_start >= 0 && cur_band_start + cur_num_bands <= Hin.p, "invalid range of bands for computing fields"); /* first, compute fft_data = curl(Hin) (really (k+G) x H) : */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; for (b = 0; b < cur_num_bands; ++b) assign_cross_t2c(&fft_data[3 * (ij2*cur_num_bands + b)], cur_k, &Hin.data[ij * 2 * Hin.p + b + cur_band_start], Hin.p); } /* now, convert to position space via FFT: */ maxwell_compute_fft(+1, d, fft_data, cur_num_bands*3, cur_num_bands*3, 1); } /* Compute E (output in dfield) from D (input in dfield); this amounts to just dividing by the dielectric tensor. dfield is in position space and corresponds to the output from maxwell_compute_d_from_H, above. */ void maxwell_compute_e_from_d(maxwell_data *d, scalar_complex *dfield, int cur_num_bands) { int i, b; CHECK(d, "null maxwell data pointer!"); CHECK(dfield, "null field input/output data!"); for (i = 0; i < d->fft_output_size; ++i) { symmetric_matrix eps_inv = d->eps_inv[i]; for (b = 0; b < cur_num_bands; ++b) { int ib = 3 * (i * cur_num_bands + b); assign_symmatrix_vector(&dfield[ib], eps_inv, &dfield[ib]); } } } /* Compute the magnetic (H) field in Fourier space from the electric field (e) in position space; this amouns to Fourier transforming and then taking the curl. Also, multiply by scale. Other parameters are as in compute_d_from_H. Note: we actually compute (k+G) x E, whereas the actual H field is -i/omega i(k+G) x E...so, we are actually computing omega*H, here. */ void maxwell_compute_H_from_e(maxwell_data *d, evectmatrix Hout, scalar_complex *efield, int cur_band_start, int cur_num_bands, real scale) { scalar *fft_data = (scalar *) efield; int i, j, b; CHECK(Hout.c == 2, "fields don't have 2 components!"); CHECK(d, "null maxwell data pointer!"); CHECK(efield, "null field output data!"); CHECK(cur_band_start >= 0 && cur_band_start + cur_num_bands <= Hout.p, "invalid range of bands for computing fields"); /* convert back to Fourier space */ maxwell_compute_fft(-1, d, fft_data, cur_num_bands*3, cur_num_bands*3, 1); /* then, compute Hout = curl(fft_data) (* scale factor): */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; for (b = 0; b < cur_num_bands; ++b) assign_cross_c2t(&Hout.data[ij * 2 * Hout.p + b + cur_band_start], Hout.p, cur_k, &fft_data[3 * (ij2*cur_num_bands + b)], scale); } } /* Compute H field in position space from Hin. Parameters and output formats are the same as for compute_d_from_H, above. */ void maxwell_compute_h_from_H(maxwell_data *d, evectmatrix Hin, scalar_complex *hfield, int cur_band_start, int cur_num_bands) { scalar *fft_data = (scalar *) hfield; int i, j, b; CHECK(Hin.c == 2, "fields don't have 2 components!"); CHECK(d, "null maxwell data pointer!"); CHECK(hfield, "null field output data!"); CHECK(cur_band_start >= 0 && cur_band_start + cur_num_bands <= Hin.p, "invalid range of bands for computing fields"); /* first, compute fft_data = Hin, with the vector field converted from transverse to cartesian basis: */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; for (b = 0; b < cur_num_bands; ++b) assign_t2c(&fft_data[3 * (ij2*cur_num_bands + b)], cur_k, &Hin.data[ij * 2 * Hin.p + b + cur_band_start], Hin.p); } /* now, convert to position space via FFT: */ maxwell_compute_fft(+1, d, fft_data, cur_num_bands*3, cur_num_bands*3, 1); } /**************************************************************************/ /* The following functions take a complex or real vector field in position space, as output by rfftwnd (or rfftwnd_mpi), and compute the "other half" of the array. That is, rfftwnd outputs only half of the logical FFT output, since the other half is redundant (see the FFTW manual). This is fine for computation, but for visualization/output we want the whole array, redundant or not. So, we output the array in two stages, first outputting the array we are given, then using the functions below to compute the other half and outputting that. Given an array A(i,j,k), the redundant half is given by the following identity for transforms of real data: A(nx-i,ny-j,nz-k) = A(i,j,k)* where nx-i/ny-j/nz-k are interpreted modulo nx/ny/nz. (This means that zero coordinates are handled specially: nx-0 = 0.) Note that actually, the other "half" is actually slightly less than half of the array. Note also that the other half, in the case of distributed MPI transforms, is not necessarily contiguous, due to special handling of zero coordinates. There is an additional complication. The array with the symmetry above may have been multiplied by exp(ikx) phases to get its Bloch state. In this case, we must use the identity: A(R-x)*exp(ik(R-x)) = ( A(x) exp(ikx) exp(-ikR) )* where R is a lattice vector. That is, we not only must conjugate and reverse the order, but we also may need to multiply by exp(-ikR) before conjugating. Unfortunately, R depends upon where we are in the array, because of the fact that the indices are interpreted modulo n (i.e. the zero indices aren't reordered). e.g. for the point (nx-i,ny-j,nz-k), R = Rx*(i!=0) + Ry*(j!=0) + Rz*(k!=0). Another complication is that, for 2d rfftwnd_mpi transforms, the truncated dimension (in the transformed, transposed array) is the *first* dimension rather than the last. This code is a little too subtle for my tastes; real FFTs are a pain. */ #define TWOPI 6.2831853071795864769252867665590057683943388 /* This function takes a complex vector field and replaces it with its other "half." phase{x,y,z} is the phase k*R{x,y,z}, in "units" of 2*pi. (Equivalently, phase{x,y,z} is the k vector in the reciprocal lattice basis.) */ void maxwell_vectorfield_otherhalf(maxwell_data *d, scalar_complex *field, real phasex, real phasey, real phasez) { #ifndef SCALAR_COMPLEX int i, j, jmin = 1; int rank, n_other, n_last, n_last_stored, n_last_new, nx, ny, nz, nxmax; # ifdef HAVE_MPI int local_x_start; # endif scalar_complex pz, pxz, pyz, pxyz; nxmax = nx = d->nx; ny = d->ny; nz = d->nz; n_other = d->other_dims; n_last = d->last_dim; n_last_stored = d->last_dim_size / 2; n_last_new = n_last - n_last_stored; /* < n_last_stored always */ rank = (nz == 1) ? (ny == 1 ? 1 : 2) : 3; # ifdef HAVE_MPI local_x_start = d->local_y_start; CHECK(rank == 2 || rank == 3, "unsupported rfftwnd_mpi rank!"); if (rank == 2) { n_other = nx; n_last_new = ny = d->local_ny; if (local_x_start == 0) --n_last_new; /* DC frequency should not be in other half */ else jmin = 0; if (local_x_start + ny == n_last_stored && n_last % 2 == 0) --n_last_new; /* Nyquist freq. should not be in other half */ n_last_stored = ny; } else { /* rank == 3 */ ny = nx; nx = d->local_ny; nxmax = local_x_start ? nx - 1 : nx; n_other = nx * ny; } # endif /* HAVE_MPI */ /* compute p = exp(i*phase) factors: */ phasex *= -TWOPI; phasey *= -TWOPI; phasez *= -TWOPI; switch (rank) { /* treat z as the last/truncated dimension always */ case 3: break; # if defined(HAVE_MPI) && ! defined(SCALAR_COMPLEX) case 2: phasez = phasex; phasex = phasey; phasey = 0; break; # else case 2: phasez = phasey; phasey = 0; break; # endif case 1: phasez = phasex; phasex = phasey = 0; break; } CASSIGN_SCALAR(pz, cos(phasez), sin(phasez)); phasex += phasez; CASSIGN_SCALAR(pxz, cos(phasex), sin(phasex)); phasex += phasey; CASSIGN_SCALAR(pxyz, cos(phasex), sin(phasex)); phasey += phasez; CASSIGN_SCALAR(pyz, cos(phasey), sin(phasey)); /* convenience macros to copy vectors, vectors times phases, and conjugated vectors: */ # define ASSIGN_V(f,k,f2,k2) { f[3*(k)+0] = f2[3*(k2)+0]; \ f[3*(k)+1] = f2[3*(k2)+1]; \ f[3*(k)+2] = f2[3*(k2)+2]; } # define ASSIGN_VP(f,k,f2,k2,p) { CASSIGN_MULT(f[3*(k)+0], f2[3*(k2)+0], p); \ CASSIGN_MULT(f[3*(k)+1], f2[3*(k2)+1], p); \ CASSIGN_MULT(f[3*(k)+2], f2[3*(k2)+2], p); } # define ASSIGN_CV(f,k,f2,k2) { CASSIGN_CONJ(f[3*(k)+0], f2[3*(k2)+0]); \ CASSIGN_CONJ(f[3*(k)+1], f2[3*(k2)+1]); \ CASSIGN_CONJ(f[3*(k)+2], f2[3*(k2)+2]); } /* First, swap the order of elements and multiply by exp(ikR) phase factors. We have to be careful here not to double-swap any element pair; this is prevented by never swapping with a "conjugated" point that is earlier in the array. */ if (rank == 3) { int ix, iy; for (ix = 0; 2*ix <= nxmax; ++ix) { int xdiff, ixc; # ifdef HAVE_MPI if (local_x_start == 0) { xdiff = ix != 0; ixc = (nx - ix) % nx; } else { xdiff = 1; ixc = nx-1 - ix; } # else xdiff = ix != 0; ixc = (nx - ix) % nx; # endif for (iy = 0; iy < ny; ++iy) { int ydiff = iy != 0; int i = ix * ny + iy, ic = ixc * ny + (ny - iy) % ny, jmax; if (ic < i) continue; jmax = n_last_new; if (ic == i) jmax = (jmax + 1) / 2; for (j = 1; j <= jmax; ++j) { int jc = n_last_new + 1 - j; int ij = i*n_last_stored + j; int ijc = ic*n_last_stored + jc; scalar_complex f_tmp[3]; switch (xdiff*2 + ydiff) { /* pick exp(-ikR) phase */ case 3: /* xdiff && ydiff */ ASSIGN_VP(f_tmp, 0, field, ijc, pxyz); ASSIGN_VP(field, ijc, field, ij, pxyz); ASSIGN_V(field, ij, f_tmp, 0); break; case 2: /* xdiff && !ydiff */ ASSIGN_VP(f_tmp, 0, field, ijc, pxz); ASSIGN_VP(field, ijc, field, ij, pxz); ASSIGN_V(field, ij, f_tmp, 0); break; case 1: /* !xdiff && ydiff */ ASSIGN_VP(f_tmp, 0, field, ijc, pyz); ASSIGN_VP(field, ijc, field, ij, pyz); ASSIGN_V(field, ij, f_tmp, 0); break; case 0: /* !xdiff && !ydiff */ ASSIGN_VP(f_tmp, 0, field, ijc, pz); ASSIGN_VP(field, ijc, field, ij, pz); ASSIGN_V(field, ij, f_tmp, 0); break; } } } } /* Next, conjugate, and remove the holes from the array corresponding to the DC and Nyquist frequencies (which were in the first half already): */ for (i = 0; i < n_other; ++i) for (j = 1; j < n_last_new + 1; ++j) { int ij = i*n_last_stored + j, ijnew = i*n_last_new + j-1; ASSIGN_CV(field, ijnew, field, ij); } } else /* if (rank <= 2) */ { int i; if (rank == 1) /* (note that 1d MPI transforms are not allowed) */ nx = 1; /* x dimension is handled by j (last dimension) loop */ # ifdef HAVE_MPI for (i = 0; i < nx; ++i) # else for (i = 0; 2*i <= nx; ++i) # endif { int xdiff = i != 0, ic = (nx - i) % nx; int jmax = n_last_new + (jmin - 1); # ifndef HAVE_MPI if (ic == i) jmax = (jmax + 1) / 2; # endif for (j = jmin; j <= jmax; ++j) { scalar_complex f_tmp[3]; # ifdef HAVE_MPI int jc = jmax + jmin - j; int ij = j * nx + i; int ijc = jc * nx + ic; if (ijc < ij) break; # else /* ! HAVE_MPI */ int jc = n_last_new + 1 - j; int ij = i*n_last_stored + j; int ijc = ic*n_last_stored + jc; # endif /* ! HAVE_MPI */ if (xdiff) { ASSIGN_VP(f_tmp, 0, field, ijc, pxz); ASSIGN_VP(field, ijc, field, ij, pxz); ASSIGN_V(field, ij, f_tmp, 0); } else { ASSIGN_VP(f_tmp, 0, field, ijc, pz); ASSIGN_VP(field, ijc, field, ij, pz); ASSIGN_V(field, ij, f_tmp, 0); } } } /* Next, conjugate, and remove the holes from the array corresponding to the DC and Nyquist frequencies (which were in the first half already): */ for (i = 0; i < nx; ++i) for (j = jmin; j < n_last_new + jmin; ++j) { # ifdef HAVE_MPI int ij = j*nx + i, ijnew = (j-jmin)*nx + i; # else int ij = i*n_last_stored + j, ijnew = i*n_last_new + j-1; # endif ASSIGN_CV(field, ijnew, field, ij); } } # undef ASSIGN_V # undef ASSIGN_VP # undef ASSIGN_CV #endif /* ! SCALAR_COMPLEX */ } /* Similar to vectorfield_otherhalf, above, except that it operates on a real scalar field, which is assumed to have come from e.g. the absolute values of a complex field (and thus no phase factors or conjugations are necessary). */ void maxwell_scalarfield_otherhalf(maxwell_data *d, real *field) { #ifndef SCALAR_COMPLEX int i, j, jmin = 1; int rank, n_other, n_last, n_last_stored, n_last_new, nx, ny, nz, nxmax; # ifdef HAVE_MPI int local_x_start; # endif nxmax = nx = d->nx; ny = d->ny; nz = d->nz; n_other = d->other_dims; n_last = d->last_dim; n_last_stored = d->last_dim_size / 2; n_last_new = n_last - n_last_stored; /* < n_last_stored always */ rank = (nz == 1) ? (ny == 1 ? 1 : 2) : 3; # ifdef HAVE_MPI local_x_start = d->local_y_start; CHECK(rank == 2 || rank == 3, "unsupported rfftwnd_mpi rank!"); if (rank == 2) { n_other = nx; n_last_new = ny = d->local_ny; if (local_x_start == 0) --n_last_new; /* DC frequency should not be in other half */ else jmin = 0; if (local_x_start + ny == n_last_stored && n_last % 2 == 0) --n_last_new; /* Nyquist freq. should not be in other half */ n_last_stored = ny; } else { /* rank == 3 */ ny = nx; nx = d->local_ny; nxmax = local_x_start ? nx - 1 : nx; n_other = nx * ny; } # endif /* HAVE_MPI */ /* First, swap the order of elements and multiply by exp(ikR) phase factors. We have to be careful here not to double-swap any element pair; this is prevented by never swapping with a "conjugated" point that is earlier in the array. */ if (rank == 3) { int ix, iy; for (ix = 0; 2*ix <= nxmax; ++ix) { int ixc; # ifdef HAVE_MPI if (local_x_start == 0) ixc = (nx - ix) % nx; else ixc = nx-1 - ix; # else ixc = (nx - ix) % nx; # endif for (iy = 0; iy < ny; ++iy) { int i = ix * ny + iy, ic = ixc * ny + (ny - iy) % ny, jmax; if (ic < i) continue; jmax = n_last_new; if (ic == i) jmax = (jmax + 1) / 2; for (j = 1; j <= jmax; ++j) { int jc = n_last_new + 1 - j; int ij = i*n_last_stored + j; int ijc = ic*n_last_stored + jc; real f_tmp; f_tmp = field[ijc]; field[ijc] = field[ij]; field[ij] = f_tmp; } } } /* Next, conjugate, and remove the holes from the array corresponding to the DC and Nyquist frequencies (which were in the first half already): */ for (i = 0; i < n_other; ++i) for (j = 1; j < n_last_new + 1; ++j) { int ij = i*n_last_stored + j, ijnew = i*n_last_new + j-1; field[ijnew] = field[ij]; } } else /* if (rank <= 2) */ { int i; if (rank == 1) /* (note that 1d MPI transforms are not allowed) */ nx = 1; /* x dimension is handled by j (last dimension) loop */ # ifdef HAVE_MPI for (i = 0; i < nx; ++i) # else for (i = 0; 2*i <= nx; ++i) # endif { int ic = (nx - i) % nx; int jmax = n_last_new + (jmin - 1); # ifndef HAVE_MPI if (ic == i) jmax = (jmax + 1) / 2; # endif for (j = jmin; j <= jmax; ++j) { real f_tmp; # ifdef HAVE_MPI int jc = jmax + jmin - j; int ij = j * nx + i; int ijc = jc * nx + ic; if (ijc < ij) break; # else /* ! HAVE_MPI */ int jc = n_last_new + 1 - j; int ij = i*n_last_stored + j; int ijc = ic*n_last_stored + jc; # endif /* ! HAVE_MPI */ f_tmp = field[ijc]; field[ijc] = field[ij]; field[ij] = f_tmp; } } /* Next, remove the holes from the array corresponding to the DC and Nyquist frequencies (which were in the first half already): */ for (i = 0; i < nx; ++i) for (j = jmin; j < n_last_new + jmin; ++j) { # ifdef HAVE_MPI int ij = j*nx + i, ijnew = (j-jmin)*nx + i; # else int ij = i*n_last_stored + j, ijnew = i*n_last_new + j-1; # endif field[ijnew] = field[ij]; } } #endif /* ! SCALAR_COMPLEX */ } /**************************************************************************/ #define MIN2(a,b) ((a) < (b) ? (a) : (b)) /* Compute Xout = curl(1/epsilon * curl(Xin)) */ void maxwell_operator(evectmatrix Xin, evectmatrix Xout, void *data, int is_current_eigenvector, evectmatrix Work) { maxwell_data *d = (maxwell_data *) data; int cur_band_start; scalar_complex *cdata; real scale; CHECK(d, "null maxwell data pointer!"); CHECK(Xin.c == 2, "fields don't have 2 components!"); (void) is_current_eigenvector; /* unused */ (void) Work; cdata = (scalar_complex *) d->fft_data; scale = -1.0 / Xout.N; /* scale factor to normalize FFT; negative sign comes from 2 i's from curls */ /* compute the operator, num_fft_bands at a time: */ for (cur_band_start = 0; cur_band_start < Xin.p; cur_band_start += d->num_fft_bands) { int cur_num_bands = MIN2(d->num_fft_bands, Xin.p - cur_band_start); maxwell_compute_d_from_H(d, Xin, cdata, cur_band_start, cur_num_bands); maxwell_compute_e_from_d(d, cdata, cur_num_bands); maxwell_compute_H_from_e(d, Xout, cdata, cur_band_start, cur_num_bands, scale); } } /* Compute the operation Xout = (M - w^2) Xin, where M is the Maxwell operator and w is the target frequency. This shifts the eigenvalue spectrum so that the smallest magnitude eigenvalues are those nearest to w. However, there are negative eigenvalues (the operator is not positive-definite), and the smallest eigenvectors (not taking the absolute value) are the same as those of M. */ void maxwell_target_operator1(evectmatrix Xin, evectmatrix Xout, void *data, int is_current_eigenvector, evectmatrix Work) { maxwell_target_data *d = (maxwell_target_data *) data; real omega_sqr = d->target_frequency * d->target_frequency; maxwell_operator(Xin, Xout, d->d, is_current_eigenvector, Work); evectmatrix_aXpbY(1.0, Xout, -omega_sqr, Xin); } /* Compute the operation Xout = (M - w^2)^2 Xin, where M is the Maxwell operator and w is the target frequency. This shifts the eigenvalue spectrum so that the smallest eigenvalues are those nearest to w. */ void maxwell_target_operator(evectmatrix Xin, evectmatrix Xout, void *data, int is_current_eigenvector, evectmatrix Work) { if (Xin.n != 0) CHECK(Work.data && Work.data != Xin.data && Work.data != Xout.data, "maxwell_target_operator must have distinct workspace!"); maxwell_target_operator1(Xin, Work, data, is_current_eigenvector, Xout); /* N.B. maxwell_operator(), and thus maxwell_target_operator1(), doesn't actually need the workspace, so we can safely pass Work here for the scratch parameter: */ maxwell_target_operator1(Work, Xout, data, is_current_eigenvector, Work); } /* Compute the operation Xout = curl 1/epsilon * i u x Xin, which is useful operation in computing the group velocity (derivative of the maxwell operator). u is a vector in cartesian coordinates. */ void maxwell_ucross_op(evectmatrix Xin, evectmatrix Xout, maxwell_data *d, const real u[3]) { scalar *fft_data; scalar_complex *cdata; real scale; int cur_band_start; int i, j, b; CHECK(d, "null maxwell data pointer!"); CHECK(Xin.c == 2, "fields don't have 2 components!"); cdata = (scalar_complex *) (fft_data = d->fft_data); scale = -1.0 / Xout.N; /* scale factor to normalize FFT; negative sign comes from 2 i's from curls */ /* compute the operator, num_fft_bands at a time: */ for (cur_band_start = 0; cur_band_start < Xin.p; cur_band_start += d->num_fft_bands) { int cur_num_bands = MIN2(d->num_fft_bands, Xin.p - cur_band_start); /* first, compute fft_data = u x Xin: */ for (i = 0; i < d->other_dims; ++i) for (j = 0; j < d->last_dim; ++j) { int ij = i * d->last_dim + j; int ij2 = i * d->last_dim_size + j; k_data cur_k = d->k_plus_G[ij]; for (b = 0; b < cur_num_bands; ++b) assign_ucross_t2c(&fft_data[3 * (ij2*cur_num_bands + b)], u, cur_k, &Xin.data[ij * 2 * Xin.p + b + cur_band_start], Xin.p); } /* now, convert to position space via FFT: */ maxwell_compute_fft(+1, d, fft_data, cur_num_bands*3, cur_num_bands*3, 1); maxwell_compute_e_from_d(d, cdata, cur_num_bands); maxwell_compute_H_from_e(d, Xout, cdata, cur_band_start, cur_num_bands, scale); } }