/*
DFT++ is a density functional package developed by the research group
of Professor Tomas Arias
Copyright 1996-2003 Sohrab Ismail-Beigi
This file is part of DFT++.
DFT++ is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DFT++ is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DFT++; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Please see the file CREDITS for a list of authors.
For academic users, we request that publications using results obtained with
this software reference
"New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).
and, if using the wavelet basis, further reference
"Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).
and
"Robust ab initio calculation of condensed matter: transparent convergence through
semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).
For your convenience, preprints of the above articles may be obtained from
http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/time.h>
#include "WL_header.h"
#define TIMELOOP 0
/* int Px,Py,Pz; */
/* Calculates the contribution to O from all levels above
current level.
3rd version of Oabove
self: the input and output grid of this calculation
workspace: additional workspace which is used inside
the convolutions
forwardfilter: actual filters to be used in the convolution,
different from constant filter because it
also includes numerical prefactor
*/
void Oabovesimple( struct Grid *self, double *workspace1,
double *workspace2,
double forwardfilter[],
double reversefilter[],
double fullfilter[])
{
int i,j;
if ( self->level > 0 )
/* Take I of SELF->parent onto SELF */
aconvP3d( self->parent->dat+
(self->org.x * self->parent->dim.y +
self->org.y) * self->parent->dim.z
+ self->org.z,
self->dat,
1, tr, tf, ts,
self->parent->dim.x,
self->parent->dim.y,
self->parent->dim.z,
self->dim.x, self->dim.y,self->dim.z,
HFL_T, workspace1, workspace2 );
/* Recursive call to Oabove for children. */
if (self->child)
Oabovesimple(self->child,workspace1,workspace2,
forwardfilter,reversefilter,fullfilter);
/* Recursive call to Oabove for siblings.*/
if (self->sibling)
Oabovesimple(self->sibling,workspace1,workspace2,
forwardfilter,reversefilter,fullfilter);
for (j=0;j<=HFL_O2;j++) {
/* ** IPD: what is i ???????? */
/* reversefilter[i*(0+HFL_O2)+j]= may be should be as: */
reversefilter[0*(1+HFL_O2)+j]=
(O2l_r[0*(1+HFL_O2)+j])*(pow(sqrt(2),self->level));
forwardfilter[0*(1+HFL_O2)+j]=
(O2l_f[0*(1+HFL_O2)+j])/(pow(sqrt(2),self->level));
fullfilter[0*(1+HFL_O2)+j]=
(O2l_s[0*(1+HFL_O2)+j])/(pow(2,self->level));
reversefilter[1*(1+HFL_O2)+j]=
(O2l_r[1*(1+HFL_O2)+j])*(pow(sqrt(2),self->level));
forwardfilter[1*(1+HFL_O2)+j]=
(O2l_f[1*(1+HFL_O2)+j])/(pow(sqrt(2),self->level));
fullfilter[1*(1+HFL_O2)+j]=
(O2l_s[1*(1+HFL_O2)+j])/(pow(2,self->level));
reversefilter[2*(1+HFL_O2)+j]=
(O2l_r[2*(1+HFL_O2)+j])*(pow(sqrt(2),self->level));
forwardfilter[2*(1+HFL_O2)+j]=
(O2l_f[2*(1+HFL_O2)+j])/(pow(sqrt(2),self->level));
fullfilter[2*(1+HFL_O2)+j]=
(O2l_s[2*(1+HFL_O2)+j])/(pow(2,self->level));
}
if (self->level == 0)
/*Remove all datapoints on self*/
for (i=0;i< ( (self->dim.x)*(self->dim.y)*(self->dim.z) );i++ )
self->dat[i]=0.;
if (self->level > 0)
/*O2level from self->parent to self*/
aconvP3d( self->parent->dat+
+(self->org.x * self->parent->dim.y +
self->org.y) * self->parent->dim.z
+ self->org.z,
self->dat,
0, reversefilter, forwardfilter, fullfilter,
self->parent->dim.x,
self->parent->dim.y,
self->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_O2, workspace1, workspace2 );
zeroghost(self);
}
/* Calculates the contribution to O from all levels above
current level.
in: the input grid of this calculation
workgrid: grid onto which we calculate I(in)
out: output grid
sign: flag if the calculation is cummulative
workspace: additional workspace which is used inside
the convolutions
forwardfilter: actual filters to be used in the convolution,
different from constant filter because it
also includes numerical prefactor
*/
void Oabovenew( struct Grid *out,
struct Grid *in,
int sign,
double forwardfilter[],
double reversefilter[],
double fullfilter[],
struct Grid *workgrid,
double *workspace1,
double *workspace2)
{
int i,j;
/* Return immediately if there is nothing to do */
/* No excutables BEFORE this statement, please! */
if (in==NULL) return;
/*Make sure output on toplevel is zero if we are not
doing a cumulative convolution*/
if ( (sign==0) && (in->level == 0) )
for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++)
out->dat[i]=0;
if ( ( in->level > 1 ) && (in->child) )
/* Take I from workgrid->parent onto workgrid. We also
add the values of in onto the target workgrid, to ensure
that we actually take I of in onto workgrid without
changing in itself*/
aconvP3dadd( workgrid->parent->dat+
(workgrid->org.x * in->parent->dim.y
+ workgrid->org.y)
* in->parent->dim.z
+ workgrid->org.z,
workgrid->dat,
in->dat,
tr, tf, ts,
workgrid->parent->dim.x,
workgrid->parent->dim.y,
workgrid->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_T, workspace1, workspace2 );
else if ( ( in->level == 1 ) && (in->child) ) {
/* Take I from in->parent onto workgrid */
aconvP3dadd( in->parent->dat+
(workgrid->org.x * in->parent->dim.y + workgrid->org.y)
* in->parent->dim.z
+ workgrid->org.z,
workgrid->dat,
in->dat,
tr, tf, ts,
in->parent->dim.x,
in->parent->dim.y,
in->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_T, workspace1, workspace2 );
}
/*Determine appropriate filter (including factors which take
into consideration the scale we are operating at) */
for (j=0;j<=HFL_O2;j++){
reversefilter[0*(1+HFL_O2)+j] = (O2l_r[0*(1+HFL_O2)+j])*in->scale.x;
forwardfilter[0*(1+HFL_O2)+j] = (O2l_f[0*(1+HFL_O2)+j]);
fullfilter[0*(1+HFL_O2)+j] = (O2l_s[0*(1+HFL_O2)+j])*in->scale.x;
reversefilter[1*(1+HFL_O2)+j] = (O2l_r[1*(1+HFL_O2)+j])*in->scale.y;
forwardfilter[1*(1+HFL_O2)+j] = (O2l_f[1*(1+HFL_O2)+j]);
fullfilter[1*(1+HFL_O2)+j] = (O2l_s[1*(1+HFL_O2)+j])*in->scale.y;
reversefilter[2*(1+HFL_O2)+j] = (O2l_r[2*(1+HFL_O2)+j])*in->scale.z;
forwardfilter[2*(1+HFL_O2)+j] = (O2l_f[2*(1+HFL_O2)+j]);
fullfilter[2*(1+HFL_O2)+j] = (O2l_s[2*(1+HFL_O2)+j])*in->scale.z;
}
if (workgrid->level > 1)
/*O2level from workgrid->parent to out*/
aconvP3d( workgrid->parent->dat+
+(workgrid->org.x * workgrid->parent->dim.y +
workgrid->org.y) * workgrid->parent->dim.z
+ workgrid->org.z,
out->dat,
sign, reversefilter, forwardfilter, fullfilter,
workgrid->parent->dim.x,
workgrid->parent->dim.y,
workgrid->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_O2, workspace1, workspace2 );
else if (workgrid->level == 1){
/*O2level directly from in->parent to out*/
aconvP3d( in->parent->dat+
+(out->org.x * in->parent->dim.y +
out->org.y) * in->parent->dim.z
+ out->org.z,
out->dat,
sign, reversefilter, forwardfilter, fullfilter,
in->parent->dim.x,
in->parent->dim.y,
in->parent->dim.z,
out->dim.x,
out->dim.y,
out->dim.z,
HFL_O2, workspace1, workspace2 );
}
/* Recursive call to Oabovenew for children. */
if (in->child)
Oabovenew(out->child,in->child,sign,
forwardfilter,reversefilter, fullfilter,
workgrid->child,workspace1,workspace2);
/* Recursive call to Oabovenew for siblings.*/
if (in->sibling)
Oabovenew(out->sibling,in->sibling,sign,
forwardfilter,reversefilter, fullfilter,
workgrid->sibling,workspace1, workspace2);
}
/*
self: input grid. Be very careful, we assume that
the input grid is zero on its ghostpoints.
workspace: memory block where some intermediate work
is done */
void Osame(struct Grid *self, double *workspace1, double *workspace2)
{
/*Take Osame on self. Need to make it possible to distinguish
between periodic boundary conditions and non periodic b.c.*/
conv3d(self->dat,O1l_r,O1l_f,O1l_s, /* added O1l_r and 01L_s */
self->dim.x,
self->dim.y,
self->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_O1);
/*Need 0 on ghostpoints*/
if (self->level > 0)
zeroghost(self);
if (self->sibling)
Osame(self->sibling,workspace1, workspace2);
if (self->child)
Osame(self->child,workspace1, workspace2);
}
/*Calculates the contribution to O from grids on the same level.
out: output grid
self: input grid, will not be changed
sign: tells whether calculation is cumulative or not
forwardfilter: workspace for filters in use
workspace: workspace for the convolutions */
void Osamenew(struct Grid *out, struct Grid *self, int sign,
double forwardfilter[],
double reversefilter[],
double fullfilter[],
double *workspace1,
double *workspace2)
{
int j;
/* Debugging check the input vector */
/* if(self->level==0){
printf("print input at beg. of Osameenew: %le\n",
self->dat[43583]);
printf("print output at beg. of Osamenew: %le\n",
out->dat[43583]);
}
*/
for (j=0;j<=HFL_O1;j++) {
reversefilter[0*(1+HFL_O1)+j] = (O1l_r[0*(1+HFL_O1)+j])*self->scale.x;
forwardfilter[0*(1+HFL_O1)+j] = (O1l_f[0*(1+HFL_O1)+j]);
fullfilter[0*(1+HFL_O1)+j] = (O1l_s[0*(1+HFL_O1)+j])*self->scale.x;
reversefilter[1*(1+HFL_O1)+j] = (O1l_r[1*(1+HFL_O1)+j])*self->scale.y;
forwardfilter[1*(1+HFL_O1)+j] = (O1l_f[1*(1+HFL_O1)+j]);
fullfilter[1*(1+HFL_O1)+j] = (O1l_s[1*(1+HFL_O1)+j])*self->scale.y;
reversefilter[2*(1+HFL_O1)+j] = (O1l_r[2*(1+HFL_O1)+j])*self->scale.z;
forwardfilter[2*(1+HFL_O1)+j] = (O1l_f[2*(1+HFL_O1)+j]);
fullfilter[2*(1+HFL_O1)+j] = (O1l_s[2*(1+HFL_O1)+j])*self->scale.z;
}
/*Take Osame on self. Need to make it possible to distinguish
between periodic boundary conditions and non periodic b.c.*/
/* IPD: now the grids know themselves of the boundaryconditions */
conv3dnew(self->dat, out->dat, sign, self->ifperiodic,
reversefilter,forwardfilter,fullfilter,
self->dim.x, self->dim.y, self->dim.z,
HFL_O1, workspace1, workspace2);
// if (self->parent) /*non periodic convolution */
// conv3dnew(self->dat,
// out->dat,
// sign,
// non_periodic,
// reversefilter,forwardfilter,fullfilter,
// self->dim.x,
// self->dim.y,
// self->dim.z,
// HFL_O1, workspace1, workspace2);
// else /*periodic convolution */
// conv3dnew(self->dat,
// out->dat,
// sign,
// do_periodic,
// reversefilter,forwardfilter,fullfilter,
// self->dim.x,
// self->dim.y,
// self->dim.z,
// HFL_O1, workspace1, workspace2);
/* Debugging */
/*
if(self->level==0){
printf("print input from Osamenew: %le\n", self->dat[43583]);
printf("print output from Osamenew: %le\n", out->dat[43583]);
}
*/
if (self->sibling)
Osamenew(out->sibling,self->sibling,sign,
forwardfilter,reversefilter, fullfilter,
workspace1, workspace2);
if (self->child)
Osamenew(out->child,self->child,sign,
forwardfilter,reversefilter, fullfilter,
workspace1, workspace2);
}
void Obelowsimple( struct Grid *self, double *workspace1,
double *workspace2,
double forwardfilter[],
double reversefilter[],
double fullfilter[])
{
int i,j,k;
for (j=0;j<=HFL_O2;j++) {
reversefilter[0*(1+HFL_O2)+j] = (O2l_r[0*(1+HFL_O2)+j])*self->scale.x;
forwardfilter[0*(1+HFL_O2)+j] = (O2l_f[0*(1+HFL_O2)+j]);
fullfilter[0*(1+HFL_O2)+j] = (O2l_s[0*(1+HFL_O2)+j])*self->scale.x;
reversefilter[1*(1+HFL_O2)+j] = (O2l_r[1*(1+HFL_O2)+j])*self->scale.y;
forwardfilter[1*(1+HFL_O2)+j] = (O2l_f[1*(1+HFL_O2)+j]);
fullfilter[1*(1+HFL_O2)+j] = (O2l_s[1*(1+HFL_O2)+j])*self->scale.y;
reversefilter[2*(1+HFL_O2)+j] = (O2l_r[2*(1+HFL_O2)+j])*self->scale.z;
forwardfilter[2*(1+HFL_O2)+j] = (O2l_f[2*(1+HFL_O2)+j]);
fullfilter[2*(1+HFL_O2)+j] = (O2l_s[2*(1+HFL_O2)+j])*self->scale.z;
}
/* O2lconvolution from self to self->parent */
if (self->level > 0)
aPconv3d( self->parent->dat+
( (self->org.x) * self->parent->dim.y
+ (self->org.y)) * self->parent->dim.z
+ (self->org.z),
self->dat,
1, reversefilter, forwardfilter, fullfilter,
self->parent->dim.x,
self->parent->dim.y,
self->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_O2, workspace1, workspace2 );
/*Remove all datapoints on self*/
for (i=0;i< ( (self->dim.x)*(self->dim.y)*(self->dim.z) );i++ )
self->dat[i]=0;
/*Same process on siblings*/
if (self->sibling)
Obelowsimple(self->sibling, workspace1, workspace2,
forwardfilter, reversefilter, fullfilter);
/*Same process on children*/
if (self->child)
Obelowsimple(self->child,workspace1, workspace2,
forwardfilter,reversefilter,fullfilter);
/*This is a time consuming hack. Need to replace it
with modified version of Idag very soon, but
for now we keep it to see if we can get the numbers
to work out correctly.*/
if (self->level>0)
for (i=0; i<self->dim.x; i+=2)
for (j=0; j<self->dim.y; j+=2)
for (k=0; k<self->dim.z; k+=2)
*( self->parent->dat +
( (self->org.x+i/2) * self->parent->dim.y +
(self->org.y+j/2) ) * self->parent->dim.z +
(self->org.z+k/2) )
+=
*( self->dat+(i*self->dim.y+j)*self->dim.z+k );
/* Idag convolution from self to self->parent
This part of the function ensures that we
get the contribution to O from ALL levels
below, not just the level right below
Need to make condition a little more rigorous than
this*/
if ( (self->parent) && (self->child) )
aPconv3d( self->parent->dat+
( (self->org.x) * self->parent->dim.y
+ (self->org.y)) * self->parent->dim.z
+ (self->org.z),
self->dat,
1, tr, tf, ts,
self->parent->dim.x,
self->parent->dim.y,
self->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_T, workspace1, workspace2 );
}
/*Calculates the contribution to O from grids on all levels below.
out: output grid
self: input grid, will not be changed
sign: tells whether calculation is cumulative or not
forwardfilter: workspace for filters in use
workspace: workspace for the convolutions */
void Obelownew( struct Grid *out,
struct Grid *self,
int sign,
double forwardfilter[],
double reversefilter[],
double fullfilter[],
double *workspace1,
double *workspace2)
{
int i,j,k;
/* Debugging check the input vector */
/*
if(self->level==0){
printf("print input at beg. of Obelownew: %le\n",
self->dat[43583]);
printf("print output at beg. of Obelownew: %le\n",
out->dat[43583]);
}
*/
/*Same process on siblings*/
if (self->sibling)
Obelownew(out->sibling,self->sibling,sign,
forwardfilter,reversefilter,fullfilter,
workspace1,workspace2);
/*Same process on children*/
if (self->child)
Obelownew(out->child,self->child,sign,
forwardfilter,reversefilter,fullfilter,
workspace1,workspace2);
/*Determine overlap coefficients, including scaling factors*/
for (j=0;j<=HFL_O2;j++) {
reversefilter[0*(1+HFL_O2)+j] = (O2l_r[0*(1+HFL_O2)+j])*self->scale.x;
forwardfilter[0*(1+HFL_O2)+j] = (O2l_f[0*(1+HFL_O2)+j]);
fullfilter[0*(1+HFL_O2)+j] = (O2l_s[0*(1+HFL_O2)+j])*self->scale.x;
reversefilter[1*(1+HFL_O2)+j] = (O2l_r[1*(1+HFL_O2)+j])*self->scale.y;
forwardfilter[1*(1+HFL_O2)+j] = (O2l_f[1*(1+HFL_O2)+j]);
fullfilter[1*(1+HFL_O2)+j] = (O2l_s[1*(1+HFL_O2)+j])*self->scale.y;
reversefilter[2*(1+HFL_O2)+j] = (O2l_r[2*(1+HFL_O2)+j])*self->scale.z;
forwardfilter[2*(1+HFL_O2)+j] = (O2l_f[2*(1+HFL_O2)+j]);
fullfilter[2*(1+HFL_O2)+j] = (O2l_s[2*(1+HFL_O2)+j])*self->scale.z;
}
if ( (sign == 0) && (self->child==NULL) )
/*Have to zero out every gridpoint on bottom level,
will not be necessary in final version of Obelownew, need
to change that eventually*/
for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++)
out->dat[i]=0;
if ( (self->level > 0) ) {
if (sign == 0)
if (self->sibling == NULL)
/*Have to zero out every gridpoint on out->parent*/
for (i=0;
i<(out->parent->dim.x*out->parent->dim.y*out->parent->dim.z);
i++)
out->parent->dat[i]=0;
/* O2lconvolution from self to out->parent */
aPconv3d( out->parent->dat+
( (self->org.x) * out->parent->dim.y
+ (self->org.y)) * out->parent->dim.z
+ (self->org.z),
self->dat,
1, reversefilter, forwardfilter, fullfilter,
out->parent->dim.x,
out->parent->dim.y,
out->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_O2, workspace1, workspace2 );
/*This is a hack which could be improved by doing the addition inside
of the aPconv3d function call below, but it only costs about 2% in
performance, so I will not modify the function to make
that possible for now..*/
for (i=0; i<out->dim.x; i+=2)
for (j=0; j<out->dim.y; j+=2)
for (k=0; k<out->dim.z; k+=2)
*( out->parent->dat +
( (out->org.x+i/2) * out->parent->dim.y +
(out->org.y+j/2) ) * out->parent->dim.z +
(out->org.z+k/2) )
+=
*( out->dat+(i*out->dim.y+j)*out->dim.z+k );
/* Idag convolution from out to out->parent
This part of the function ensures that we
get the contribution to O from ALL levels
below, not just the level right below
Need to make condition a little more rigorous than
this*/
if (out->child)
aPconv3d( out->parent->dat+
( (out->org.x) * out->parent->dim.y
+ (out->org.y)) * out->parent->dim.z
+ (out->org.z),
out->dat,
1, tr, tf, ts,
out->parent->dim.x,
out->parent->dim.y,
out->parent->dim.z,
out->dim.x,
out->dim.y,
out->dim.z,
HFL_T, workspace1, workspace2 );
}
}
/*Multilevel overlap function, input - output
in: the inputgridstructure, Is assumed to be
zero on it ghostpoints. If is not, then
run it once through zeroghostrec to
ensure consistency
workgrid: a grid onto which we place the results of
some intermediate calculations
workspace: additional workspace which is used inside
the convolutions */
void O( struct Grid *out, struct Grid *in,
double O1lf[], double O1lr[], double O1lfull[],
double O2lf[], double O2lr[], double O2lfull[],
struct Grid *workgrid, double *workspace1, double *workspace2)
{
/* Use a buffer grid, so that O(in, in) would work properly */
/* struct Grid *Tgrid; */
/*Uncomment the line below if not zero on ghostpoints*/
/* zeroghostrec(in); */
if ( (HFL_T>HFL_O2) || (HFL_O1>HFL_O2) || (HFL_T>HFL_O1) ) {
printf("Warning: our code assumes HFL_O2 > HFL_O1 > HFL_T\n");
printf("while you have entered the lengths %d %d %d\n\n",
HFL_O2,HFL_O1,HFL_T);
printf("This creates problems with the size of workspace\n");
printf("so you must change the parameters determining the workspace.\n");
exit(1);
}
/*
#ifdef newgridtype
Tgrid=mkgridfromgridnew(out,NULL);
#else
Tgrid=mkgridfromgrid(out,NULL);
#endif
*/
/*Find contributions to O from grid at all levels below*/
Obelownew(out,in,0,O2lf,O2lr,O2lfull,workspace1,workspace2);
/* Obelownew(Tgrid,in,0,O2lf,O2lr,O2lfull,workspace1,workspace2); */
/*Find contributions to O from levels above*/
Oabovenew(out,in,1,O2lf,O2lr,O2lfull,workgrid,workspace1,workspace2);
/* Oabovenew(Tgrid,in,1,O2lf,O2lr,O2lfull,workgrid,workspace1,workspace2); */
/*Find contributions to O from same level*/
Osamenew(out,in,1,O1lf,O1lr,O2lfull,workspace1,workspace2);
/* Osamenew(Tgrid,in,1,O1lf,O1lr,O2lfull,workspace1,workspace2); */
/*
#ifdef newgridtype
copygridnew(out, Tgrid);
killgridnew(Tgrid);
#else
copygrid(out, Tgrid);
killgrid(Tgrid);
#endif
*/
/*Make sure the function returns a grid with zeros on ghostpoints*/
zeroghostrec(out);
/* what if we try the simple versions?? - does it inplace - not too bad;
Obelowsimple(in,workspace1,workspace2,0,O2lf,O2lr,O2lfull);
Oabovesimple(in,workspace1,workspace2,O2lf,O2lr,O2lfull);
Osame(in, workspace1, workspace2) */
}
/****************************************************************/
/* Routines for L */
/* Calculates the contribution to L from all levels above
current level.
in: the input grid of this calculation
workgrid: grid onto which we calculate I(in)
out: output grid
workspace: additional workspace which is used inside
the convolutions
forwardfilter: workspace for actual filters to be used
in the convolution,
different from constant filter because it
also includes numerical prefactor
*/
void Labovenew( struct Grid *out,
struct Grid *in,
int sign,
double forwardfilter[],
double reversefilter[],
double fullfilter[],
struct Grid *workgrid,
double *workspace1,
double *workspace2)
{
int i,j;
/* Return immediately if there is nothing to do */
/* No excutables BEFORE this statement, please! */
if (in==NULL) return;
/*Make sure output on toplevel is zero if we are not
doing a cumulative convolution*/
if ( (sign==0) && (in->level == 0) )
for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++)
out->dat[i]=0;
if ( ( in->level > 1 ) && (in->child) )
/* Take I from workgrid->parent onto workgrid. We also
add the values of in onto the target workgrid, to ensure
that we actually take I of in onto workgrid without
changing in itself*/
aconvP3dadd( workgrid->parent->dat+
(workgrid->org.x * in->parent->dim.y
+ workgrid->org.y)
* in->parent->dim.z
+ workgrid->org.z,
workgrid->dat,
in->dat,
tr, tf, ts,
workgrid->parent->dim.x,
workgrid->parent->dim.y,
workgrid->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_T, workspace1,workspace2 );
else if ( ( in->level == 1 ) && (in->child) )
/* Take I from in->parent onto workgrid */
aconvP3dadd( in->parent->dat+
(workgrid->org.x * in->parent->dim.y
+ workgrid->org.y)
* in->parent->dim.z
+ workgrid->org.z,
workgrid->dat,
in->dat,
tr, tf, ts,
in->parent->dim.x,
in->parent->dim.y,
in->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_T, workspace1,workspace2 );
/*Determine appropriate filter (including factors
which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L2;j++) {
reversefilter[0*(1+HFL_L2)+j]=(L2l_r[j])/in->scale.x;
forwardfilter[0*(1+HFL_L2)+j]=(L2l_f[j]);
fullfilter[0*(1+HFL_L2)+j]=(L2l_s[j])/in->scale.x;
reversefilter[1*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.y;
forwardfilter[1*(1+HFL_L2)+j]=(O2l_f[j]);
fullfilter[1*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.y;
reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.z;
forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j]);
fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.z;
/* reversefilter[0*(1+HFL_L2)+j]=(L2l_r[j])*(pow(sqrt(2),in->level)); */
/* forwardfilter[0*(1+HFL_L2)+j]=(L2l_f[j])*(pow(sqrt(2),in->level)); */
/* fullfilter[0*(1+HFL_L2)+j]=(L2l_s[j])*(pow(2,in->level)); */
/* reversefilter[1*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */
/* forwardfilter[1*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */
/* fullfilter[1*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */
/* reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */
/* forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */
/* fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */
}
if (workgrid->level > 1)
/*L2level in x-direction from workgrid->parent to out*/
aconvP3d( workgrid->parent->dat+
+(workgrid->org.x * workgrid->parent->dim.y +
workgrid->org.y) * workgrid->parent->dim.z
+ workgrid->org.z,
out->dat,
sign, reversefilter, forwardfilter, fullfilter,
workgrid->parent->dim.x,
workgrid->parent->dim.y,
workgrid->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_L2, workspace1,workspace2 );
else if (workgrid->level == 1)
/*L2level in x-direction directly from in->parent to out*/
aconvP3d( in->parent->dat+
+(out->org.x * in->parent->dim.y +
out->org.y) * in->parent->dim.z
+ out->org.z,
out->dat,
sign, reversefilter, forwardfilter, fullfilter,
in->parent->dim.x,
in->parent->dim.y,
in->parent->dim.z,
out->dim.x,
out->dim.y,
out->dim.z,
HFL_L2, workspace1, workspace2 );
/*Determine appropriate filter (including factors
which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L2;j++) {
reversefilter[0*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.x;
forwardfilter[0*(1+HFL_L2)+j]=(O2l_f[j]);
fullfilter[0*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.x;
reversefilter[1*(1+HFL_L2)+j]=(L2l_r[j])/in->scale.y;
forwardfilter[1*(1+HFL_L2)+j]=(L2l_f[j]);
fullfilter[1*(1+HFL_L2)+j]=(L2l_s[j])/in->scale.y;
reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.z;
forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j]);
fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.z;
/* reversefilter[0*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */
/* forwardfilter[0*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */
/* fullfilter[0*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */
/* reversefilter[1*(1+HFL_L2)+j]=(L2l_r[j])*(pow(sqrt(2),in->level)); */
/* forwardfilter[1*(1+HFL_L2)+j]=(L2l_f[j])*(pow(sqrt(2),in->level)); */
/* fullfilter[1*(1+HFL_L2)+j]=(L2l_s[j])*(pow(2,in->level)); */
/* reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */
/* forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */
/* fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */
}
if (workgrid->level > 1)
/*L2level in y-direction from workgrid->parent to out*/
aconvP3d( workgrid->parent->dat+
+(workgrid->org.x * workgrid->parent->dim.y +
workgrid->org.y) * workgrid->parent->dim.z
+ workgrid->org.z,
out->dat,
1, reversefilter, forwardfilter, fullfilter,
workgrid->parent->dim.x,
workgrid->parent->dim.y,
workgrid->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_L2, workspace1,workspace2 );
else if (workgrid->level == 1)
/*L2level in y-direction directly from in->parent to out*/
aconvP3d( in->parent->dat+
+(out->org.x * in->parent->dim.y +
out->org.y) * in->parent->dim.z
+ out->org.z,
out->dat,
1, reversefilter, forwardfilter, fullfilter,
in->parent->dim.x,
in->parent->dim.y,
in->parent->dim.z,
out->dim.x,
out->dim.y,
out->dim.z,
HFL_L2, workspace1, workspace2 );
/*Determine appropriate filter (including factors
which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L2;j++) {
reversefilter[0*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.x;
forwardfilter[0*(1+HFL_L2)+j]=(O2l_f[j]);
fullfilter[0*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.x;
reversefilter[1*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.y;
forwardfilter[1*(1+HFL_L2)+j]=(O2l_f[j]);
fullfilter[1*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.y;
reversefilter[2*(1+HFL_L2)+j]=(L2l_r[j])/in->scale.z;
forwardfilter[2*(1+HFL_L2)+j]=(L2l_f[j]);
fullfilter[2*(1+HFL_L2)+j]=(L2l_s[j])/in->scale.z;
}
if (workgrid->level > 1)
/*L2level in z-direction from workgrid->parent to out*/
aconvP3d( workgrid->parent->dat+
+(workgrid->org.x * workgrid->parent->dim.y +
workgrid->org.y) * workgrid->parent->dim.z
+ workgrid->org.z,
out->dat,
1, reversefilter, forwardfilter, fullfilter,
workgrid->parent->dim.x,
workgrid->parent->dim.y,
workgrid->parent->dim.z,
workgrid->dim.x,
workgrid->dim.y,
workgrid->dim.z,
HFL_L2, workspace1, workspace2 );
else if (workgrid->level == 1)
/*L2level in z-direction directly from in->parent to out*/
aconvP3d( in->parent->dat+
+(out->org.x * in->parent->dim.y +
out->org.y) * in->parent->dim.z
+ out->org.z,
out->dat,
1, reversefilter, forwardfilter, fullfilter,
in->parent->dim.x,
in->parent->dim.y,
in->parent->dim.z,
out->dim.x,
out->dim.y,
out->dim.z,
HFL_L2, workspace1, workspace2 );
/* Recursive call to Labovenew for children. */
if (in->child)
Labovenew(out->child,in->child,sign,
forwardfilter, reversefilter, fullfilter,
workgrid->child, workspace1, workspace2 );
/* Recursive call to Labovenew for siblings.*/
if (in->sibling)
Labovenew(out->sibling,in->sibling,sign,
forwardfilter,reversefilter, fullfilter,
workgrid->sibling,workspace1, workspace2 );
}
/*Calculates the contribution to L from grids on the same level.
out: output grid
self: input grid, will not be changed
sign: tells whether calculation is cumulative or not
forwardfilter: workspace for filters in use
workspace: workspace for the convolutions */
void Lsamenew(struct Grid *out, struct Grid *self, int sign,
double forwardfilter[],
double reversefilter[],
double fullfilter[],
double *workspace1,
double *workspace2)
{
int j;
/*Determine appropriate filter for convolution in x-direction
(including factors which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L1;j++) {
reversefilter[0*(1+HFL_L1)+j]=(L1l_r[j])/self->scale.x;
forwardfilter[0*(1+HFL_L1)+j]=(L1l_f[j]);
fullfilter[0*(1+HFL_L1)+j]=(L1l_s[j])/self->scale.x;;
reversefilter[1*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.y;
forwardfilter[1*(1+HFL_L1)+j]=(O1l_f[j]);
fullfilter[1*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.y;
reversefilter[2*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.z;
forwardfilter[2*(1+HFL_L1)+j]=(O1l_f[j]);
fullfilter[2*(1+HFL_L1)+j]=(O1l_f[j])*self->scale.z;
}
/*Take Lsame on self. Need to make it possible to distinguish
between periodic boundary conditions and non periodic b.c.*/
/* IPD: Now the grids know about the boundary conditions */
/* Do convolution, dy dz / dx */
conv3dnew(self->dat, out->dat, sign, self->ifperiodic,
reversefilter,forwardfilter, fullfilter,
self->dim.x, self->dim.y, self->dim.z,
HFL_L1, workspace1, workspace2);
/*Determine appropriate filter for convolution in y-direction
(including factors which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L1;j++) {
reversefilter[0*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.x;
forwardfilter[0*(1+HFL_L1)+j]=(O1l_f[j]);
fullfilter[0*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.x;
reversefilter[1*(1+HFL_L1)+j]=(L1l_r[j])/self->scale.y;
forwardfilter[1*(1+HFL_L1)+j]=(L1l_f[j]);
fullfilter[1*(1+HFL_L1)+j]=(L1l_s[j])/self->scale.y;
reversefilter[2*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.z;
forwardfilter[2*(1+HFL_L1)+j]=(O1l_f[j]);
fullfilter[2*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.z;
}
/* Do convolution,dx dz / dy */
conv3dnew(self->dat, out->dat, 1, self->ifperiodic,
reversefilter,forwardfilter, fullfilter,
self->dim.x, self->dim.y, self->dim.z,
HFL_L1, workspace1, workspace2);
/*Determine appropriate filter for convolution in z-direction
(including factors which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L1;j++) {
reversefilter[0*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.x;
forwardfilter[0*(1+HFL_L1)+j]=(O1l_f[j]);
fullfilter[0*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.x;
reversefilter[1*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.y;
forwardfilter[1*(1+HFL_L1)+j]=(O1l_f[j]);
fullfilter[1*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.y;
reversefilter[2*(1+HFL_L1)+j]=(L1l_r[j])/self->scale.z;
forwardfilter[2*(1+HFL_L1)+j]=(L1l_f[j]);
fullfilter[2*(1+HFL_L1)+j]=(L1l_s[j])/self->scale.z;
}
/* Do convolution,dx dy /dz */
conv3dnew(self->dat, out->dat, 1, self->ifperiodic,
reversefilter,forwardfilter, fullfilter,
self->dim.x, self->dim.y, self->dim.z,
HFL_L1, workspace1, workspace2);
if (self->sibling)
Lsamenew(out->sibling,self->sibling,sign,
forwardfilter,reversefilter,fullfilter,
workspace1, workspace2);
if (self->child)
Lsamenew(out->child,self->child,sign,
forwardfilter,reversefilter, fullfilter,
workspace1, workspace2);
}
/*Calculates the contribution to L from grids on all levels below.
out: output grid
self: input grid, will not be changed
sign: tells whether calculation is cumulative or not
forwardfilter: workspace for filters in use
workspace: workspace for the convolutions */
void Lbelownew( struct Grid *out, struct Grid *self, int sign,
double forwardfilter[],
double reversefilter[],
double fullfilter[],
double *workspace1, double *workspace2)
{
int i,j,k;
/*Same process on siblings*/
if (self->sibling)
Lbelownew(out->sibling,self->sibling,sign,
forwardfilter,reversefilter,fullfilter,
workspace1,workspace2);
/*Same process on children*/
if (self->child)
Lbelownew(out->child,self->child,sign,
forwardfilter,reversefilter,fullfilter,
workspace1,workspace2);
/*Have to zero out every gridpoint on bottom level,
will not be necessary in final version of Obelow, need
to change that eventually*/
if ( (sign == 0) && (self->child==NULL) )
for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++)
out->dat[i]=0;
if ( (self->level > 0) ) {
/*First do L in x-direction */
/*Determine appropriate filter (including factors
which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L2;j++) {
reversefilter[0*(1+HFL_L2)+j] = (L2l_r[j])/self->scale.x;
forwardfilter[0*(1+HFL_L2)+j] = (L2l_f[j]);
fullfilter[0*(1+HFL_L2)+j] = (L2l_s[j])/self->scale.x;
reversefilter[1*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.y;
forwardfilter[1*(1+HFL_L2)+j] = (O2l_f[j]);
fullfilter[1*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.y;
reversefilter[2*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.z;
forwardfilter[2*(1+HFL_L2)+j] = (O2l_f[j]);
fullfilter[2*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.z;
}
if (sign == 0)
if (self->sibling == NULL)
/*Have to zero out every gridpoint on out->parent*/
for (i=0;
i<(out->parent->dim.x*out->parent->dim.y*out->parent->dim.z);
i++)
out->parent->dat[i]=0;
/* L2lconvolution from self to out->parent */
aPconv3d( out->parent->dat+
( (self->org.x) * out->parent->dim.y
+ (self->org.y)) * out->parent->dim.z
+ (self->org.z),
self->dat,
1, reversefilter, forwardfilter, fullfilter,
out->parent->dim.x,
out->parent->dim.y,
out->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_L2, workspace1, workspace2 );
/*Next do L in y-direction */
/*Determine appropriate filter (including factors
which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L2;j++) {
reversefilter[0*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.x;
forwardfilter[0*(1+HFL_L2)+j] = (O2l_f[j]);
fullfilter[0*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.x;
reversefilter[1*(1+HFL_L2)+j] = (L2l_r[j])/self->scale.y;
forwardfilter[1*(1+HFL_L2)+j] = (L2l_f[j]);
fullfilter[1*(1+HFL_L2)+j] = (L2l_s[j])/self->scale.y;
reversefilter[2*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.z;
forwardfilter[2*(1+HFL_L2)+j] = (O2l_f[j]);
fullfilter[2*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.z;
}
/* L2lconvolution from self to out->parent */
aPconv3d( out->parent->dat+
( (self->org.x) * out->parent->dim.y
+ (self->org.y)) * out->parent->dim.z
+ (self->org.z),
self->dat,
1, reversefilter, forwardfilter, fullfilter,
out->parent->dim.x,
out->parent->dim.y,
out->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_L2, workspace1, workspace2 );
/*Finally do L in z-direction */
/*Determine appropriate filter (including factors
which take into consideration the scale we are
operating at) */
for (j=0;j<=HFL_L2;j++) {
reversefilter[0*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.x;
forwardfilter[0*(1+HFL_L2)+j] = (O2l_f[j]);
fullfilter[0*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.x;
reversefilter[1*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.y;
forwardfilter[1*(1+HFL_L2)+j] = (O2l_f[j]);
fullfilter[1*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.y;
reversefilter[2*(1+HFL_L2)+j] = (L2l_r[j])/self->scale.z;
forwardfilter[2*(1+HFL_L2)+j] = (L2l_f[j]);
fullfilter[2*(1+HFL_L2)+j] = (L2l_s[j])/self->scale.z;
}
/* L2lconvolution from self to out->parent */
aPconv3d( out->parent->dat+
( (self->org.x) * out->parent->dim.y
+ (self->org.y)) * out->parent->dim.z
+ (self->org.z),
self->dat,
1, reversefilter, forwardfilter, fullfilter,
out->parent->dim.x,
out->parent->dim.y,
out->parent->dim.z,
self->dim.x,
self->dim.y,
self->dim.z,
HFL_L2, workspace1, workspace2 );
/*This is a hack which could be improved by doing the addition inside
of the aPconv3d function call below, but it only costs about 2% in
performance, so I will not modify the function to make
that possible for now..*/
for (i=0; i<out->dim.x; i+=2)
for (j=0; j<out->dim.y; j+=2)
for (k=0; k<out->dim.z; k+=2)
*( out->parent->dat +
( (out->org.x+i/2) * out->parent->dim.y +
(out->org.y+j/2) ) * out->parent->dim.z +
(out->org.z+k/2) )
+=
*( out->dat+(i*out->dim.y+j)*out->dim.z+k );
/* Idag convolution from out to out->parent
This part of the function ensures that we
get the contribution to O from ALL levels
below, not just the level right below
Need to make condition a little more rigorous than
this*/
if (out->child)
aPconv3d( out->parent->dat+
( (out->org.x) * out->parent->dim.y
+ (out->org.y)) * out->parent->dim.z
+ (out->org.z),
out->dat,
1, tr, tf, ts,
out->parent->dim.x,
out->parent->dim.y,
out->parent->dim.z,
out->dim.x,
out->dim.y,
out->dim.z,
HFL_T, workspace1, workspace2 );
}
}
/*Multilevel Laplacian function (in place, recursive)
input: the input and output grid of this calculation
workgrid: a grid onto which we place the results of
some intermediate calculations
workspace: additional workspace which is used inside
the convolutions */
void
L( struct Grid *out, struct Grid *in,
double L1lf[], double L1lr[], double L1lfull[],
double L2lf[], double L2lr[], double L2lfull[],
struct Grid *workgrid,double *workspace1, double *workspace2)
{
/* use a buffer grid, so that L(in, in) works properly as well */
/* struct Grid *Tgrid; */
if ( (HFL_T>HFL_O2) || (HFL_O1>HFL_O2) || (HFL_T>HFL_O1) ) {
printf("Warning: our code assumes HFL_O2 > HFL_O1 > HFL_T\n");
printf("while you have entered the lengths %d %d %d\n\n",
HFL_O2,HFL_O1,HFL_T);
printf("This creates problems with the size of workspace\n");
printf("so you must change the parameters determining the workspace.\n");
exit(1);
}
/* We'll have to figure out where to put this, because it can't stay
global. */
/* no_of_times_L_called+=1; */
/*
#ifdef newgridtype
Tgrid=mkgridfromgridnew(out, NULL);
#else
Tgrid=mkgridfromgrid(out, NULL);
#endif
*/
/*Find contributions to L from grid at all levels below*/
Lbelownew(out,in,0,L2lf,L2lr,L2lfull,workspace1,workspace2);
/* Lbelownew(Tgrid,in,0,L2lf,L2lr,L2lfull,workspace1,workspace2); */
/*Find contributions to L from levels above*/
Labovenew(out,in,1,L2lf,L2lr,L2lfull,workgrid,workspace1,workspace2);
/* Labovenew(Tgrid,in,1,L2lf,L2lr,L2lfull,workgrid,workspace1,workspace2); */
/*Find contributions to L from same level*/
Lsamenew(out,in,1,L1lf,L1lr,L1lfull,workspace1,workspace2);
/* Lsamenew(Tgrid,in,1,L1lf,L1lr,L1lfull,workspace1,workspace2); */
/*
#ifdef newgridtype
copygridnew(out, Tgrid);
killgridnew(Tgrid);
#else
copygrid(out, Tgrid);
killgrid(Tgrid);
#endif
*/
zeroghostrec(out);
}
syntax highlighted by Code2HTML, v. 0.9.1