/*
* Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include "rfftw_mpi.h"
/***************************** Plan Creation ****************************/
rfftwnd_mpi_plan rfftwnd_mpi_create_plan(MPI_Comm comm,
int rank, const int *n,
fftw_direction dir,
int flags)
{
rfftwnd_mpi_plan p;
if (rank < 2)
return 0;
p = (rfftwnd_mpi_plan) fftw_malloc(sizeof(rfftwnd_mpi_plan_data));
p->p_fft_x = 0;
p->p_fft = 0;
p->p_transpose = 0;
p->p_transpose_inv = 0;
p->work = 0;
p->p_fft_x = fftw_create_plan(n[0], dir, flags | FFTW_IN_PLACE);
p->p_fft = rfftwnd_create_plan(rank-1, n+1, dir, flags | FFTW_IN_PLACE);
if (!p->p_fft)
rfftwnd_mpi_destroy_plan(p);
p->p_transpose = transpose_mpi_create_plan(n[0], p->p_fft->n[0], comm);
if (!p->p_transpose)
rfftwnd_mpi_destroy_plan(p);
p->p_transpose_inv = transpose_mpi_create_plan(p->p_fft->n[0], n[0], comm);
if (!p->p_transpose_inv)
rfftwnd_mpi_destroy_plan(p);
if (n[0] > p->p_fft->nwork)
p->work = (fftw_complex *) fftw_malloc(n[0] * sizeof(fftw_complex));
return p;
}
rfftwnd_mpi_plan rfftw2d_mpi_create_plan(MPI_Comm comm,
int nx, int ny,
fftw_direction dir, int flags)
{
int n[2];
n[0] = nx;
n[1] = ny;
return rfftwnd_mpi_create_plan(comm, 2, n, dir, flags);
}
rfftwnd_mpi_plan rfftw3d_mpi_create_plan(MPI_Comm comm,
int nx, int ny, int nz,
fftw_direction dir, int flags)
{
int n[3];
n[0] = nx;
n[1] = ny;
n[2] = nz;
return rfftwnd_mpi_create_plan(comm, 3, n, dir, flags);
}
/********************** Plan Destruction ************************/
void rfftwnd_mpi_destroy_plan(rfftwnd_mpi_plan p)
{
if (p) {
if (p->p_fft_x)
fftw_destroy_plan(p->p_fft_x);
if (p->p_fft)
rfftwnd_destroy_plan(p->p_fft);
if (p->p_transpose)
transpose_mpi_destroy_plan(p->p_transpose);
if (p->p_transpose_inv)
transpose_mpi_destroy_plan(p->p_transpose_inv);
if (p->work)
fftw_free(p->work);
fftw_free(p);
}
}
/********************* Getting Local Size ***********************/
void rfftwnd_mpi_local_sizes(rfftwnd_mpi_plan p,
int *local_nx,
int *local_x_start,
int *local_ny_after_transpose,
int *local_y_start_after_transpose,
int *total_local_size)
{
if (p) {
transpose_mpi_get_local_size(p->p_transpose->nx,
p->p_transpose->my_pe,
p->p_transpose->n_pes,
local_nx,
local_x_start);
transpose_mpi_get_local_size(p->p_transpose->ny,
p->p_transpose->my_pe,
p->p_transpose->n_pes,
local_ny_after_transpose,
local_y_start_after_transpose);
*total_local_size =
transpose_mpi_get_local_storage_size(p->p_transpose->nx,
p->p_transpose->ny,
p->p_transpose->my_pe,
p->p_transpose->n_pes);
*total_local_size *= p->p_fft->n_after[0];
*total_local_size *= 2; /* return size in fftw_real's */
if (p->p_fft->rank == 1 && p->p_fft->dir == FFTW_COMPLEX_TO_REAL) {
*local_ny_after_transpose *= 2;
*local_y_start_after_transpose *= 2;
}
}
}
/******************** Computing the Transform *******************/
static void first_dim_aux(rfftwnd_mpi_plan p,
int n_fields, fftw_real *local_data)
{
int local_ny = p->p_transpose->local_ny;
int nx = p->p_fft_x->n;
fftw_complex *work_1d = p->work ? p->work : p->p_fft->work;
n_fields *= p->p_fft->n_after[0]; /* dimensions after y
no longer need be considered
separately from n_fields */
if (n_fields > 1) {
fftw_plan p_fft_x = p->p_fft_x;
int fft_iter;
for (fft_iter = 0; fft_iter < local_ny; ++fft_iter)
fftw(p_fft_x, n_fields,
((fftw_complex *) local_data)
+ (nx * n_fields) * fft_iter, n_fields, 1,
work_1d, 1, 0);
}
else
fftw(p->p_fft_x, local_ny,
(fftw_complex *) local_data, 1, nx, work_1d, 1, 0);
}
static void other_dims_aux(rfftwnd_mpi_plan p,
int n_fields, fftw_real *local_data)
{
int local_nx = p->p_transpose->local_nx;
int n_after_x = p->p_fft->n[0] * p->p_fft->n_after[0];
if (n_fields > 1) {
rfftwnd_plan p_fft = p->p_fft;
int fft_iter;
if (p_fft->dir == FFTW_REAL_TO_COMPLEX)
for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
rfftwnd_real_to_complex(p_fft, n_fields,
local_data
+ (2 * n_after_x * n_fields) * fft_iter,
n_fields, 1,
NULL, 0, 0);
else
for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
rfftwnd_complex_to_real(p_fft, n_fields,
((fftw_complex *) local_data)
+ (n_after_x * n_fields) * fft_iter,
n_fields, 1,
NULL, 0, 0);
}
else {
if (p->p_fft->dir == FFTW_REAL_TO_COMPLEX)
rfftwnd_real_to_complex(p->p_fft, local_nx,
local_data, 1, 2*n_after_x,
NULL, 0, 0);
else
rfftwnd_complex_to_real(p->p_fft, local_nx,
(fftw_complex *) local_data,
1, n_after_x,
NULL, 0, 0);
}
}
void rfftwnd_mpi(rfftwnd_mpi_plan p,
int n_fields, fftw_real *local_data, fftw_real *work,
fftwnd_mpi_output_order output_order)
{
int el_size = (sizeof(fftw_complex) / sizeof(TRANSPOSE_EL_TYPE))
* n_fields * p->p_fft->n_after[0];
if (n_fields <= 0)
return;
if (p->p_fft->dir == FFTW_REAL_TO_COMPLEX) {
/* First, transform dimensions after the first, which are
local to this process: */
other_dims_aux(p, n_fields, local_data);
/* Second, transpose the first dimension with the second dimension
to bring the x dimension local to this process: */
transpose_mpi(p->p_transpose, el_size,
(TRANSPOSE_EL_TYPE *) local_data,
(TRANSPOSE_EL_TYPE *) work);
/* Third, transform the x dimension, which is now
local and contiguous: */
first_dim_aux(p, n_fields, local_data);
/* transpose back, if desired: */
if (output_order == FFTW_NORMAL_ORDER)
transpose_mpi(p->p_transpose_inv, el_size,
(TRANSPOSE_EL_TYPE *) local_data,
(TRANSPOSE_EL_TYPE *) work);
}
else { /* we have to do the steps in reverse order for c2r transform: */
/* NOTE: we assume that the same output_order is used for both
the forward and backward transforms: */
/* First, if necessary, transpose to get x dimension local: */
if (output_order == FFTW_NORMAL_ORDER)
transpose_mpi(p->p_transpose, el_size,
(TRANSPOSE_EL_TYPE *) local_data,
(TRANSPOSE_EL_TYPE *) work);
/* Second, transform the x dimension, which is now
local and contiguous: */
first_dim_aux(p, n_fields, local_data);
/* Third, transpose the first dimension with the second dimension
to bring the others dimensions local to this process: */
transpose_mpi(p->p_transpose_inv, el_size,
(TRANSPOSE_EL_TYPE *) local_data,
(TRANSPOSE_EL_TYPE *) work);
/* last, transform dimensions after the first, which are
local to this process: */
other_dims_aux(p, n_fields, local_data);
}
}
syntax highlighted by Code2HTML, v. 0.9.1