/* DFT++ is a density functional package developed by the research group of Professor Tomas Arias Copyright 1996-2003 Sohrab Ismail-Beigi This file is part of DFT++. DFT++ is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DFT++ is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DFT++; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please see the file CREDITS for a list of authors. For academic users, we request that publications using results obtained with this software reference "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000). and, if using the wavelet basis, further reference "Multiresolution analysis of electronic structure: semicardinal and wavelet bases," T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999). and "Robust ab initio calculation of condensed matter: transparent convergence through semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003). For your convenience, preprints of the above articles may be obtained from http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively. */ #include #include #include #include #include "WL_header.h" #define TIMELOOP 0 /* int Px,Py,Pz; */ /* Calculates the contribution to O from all levels above current level. 3rd version of Oabove self: the input and output grid of this calculation workspace: additional workspace which is used inside the convolutions forwardfilter: actual filters to be used in the convolution, different from constant filter because it also includes numerical prefactor */ void Oabovesimple( struct Grid *self, double *workspace1, double *workspace2, double forwardfilter[], double reversefilter[], double fullfilter[]) { int i,j; if ( self->level > 0 ) /* Take I of SELF->parent onto SELF */ aconvP3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y,self->dim.z, HFL_T, workspace1, workspace2 ); /* Recursive call to Oabove for children. */ if (self->child) Oabovesimple(self->child,workspace1,workspace2, forwardfilter,reversefilter,fullfilter); /* Recursive call to Oabove for siblings.*/ if (self->sibling) Oabovesimple(self->sibling,workspace1,workspace2, forwardfilter,reversefilter,fullfilter); for (j=0;j<=HFL_O2;j++) { /* ** IPD: what is i ???????? */ /* reversefilter[i*(0+HFL_O2)+j]= may be should be as: */ reversefilter[0*(1+HFL_O2)+j]= (O2l_r[0*(1+HFL_O2)+j])*(pow(sqrt(2),self->level)); forwardfilter[0*(1+HFL_O2)+j]= (O2l_f[0*(1+HFL_O2)+j])/(pow(sqrt(2),self->level)); fullfilter[0*(1+HFL_O2)+j]= (O2l_s[0*(1+HFL_O2)+j])/(pow(2,self->level)); reversefilter[1*(1+HFL_O2)+j]= (O2l_r[1*(1+HFL_O2)+j])*(pow(sqrt(2),self->level)); forwardfilter[1*(1+HFL_O2)+j]= (O2l_f[1*(1+HFL_O2)+j])/(pow(sqrt(2),self->level)); fullfilter[1*(1+HFL_O2)+j]= (O2l_s[1*(1+HFL_O2)+j])/(pow(2,self->level)); reversefilter[2*(1+HFL_O2)+j]= (O2l_r[2*(1+HFL_O2)+j])*(pow(sqrt(2),self->level)); forwardfilter[2*(1+HFL_O2)+j]= (O2l_f[2*(1+HFL_O2)+j])/(pow(sqrt(2),self->level)); fullfilter[2*(1+HFL_O2)+j]= (O2l_s[2*(1+HFL_O2)+j])/(pow(2,self->level)); } if (self->level == 0) /*Remove all datapoints on self*/ for (i=0;i< ( (self->dim.x)*(self->dim.y)*(self->dim.z) );i++ ) self->dat[i]=0.; if (self->level > 0) /*O2level from self->parent to self*/ aconvP3d( self->parent->dat+ +(self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 0, reversefilter, forwardfilter, fullfilter, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_O2, workspace1, workspace2 ); zeroghost(self); } /* Calculates the contribution to O from all levels above current level. in: the input grid of this calculation workgrid: grid onto which we calculate I(in) out: output grid sign: flag if the calculation is cummulative workspace: additional workspace which is used inside the convolutions forwardfilter: actual filters to be used in the convolution, different from constant filter because it also includes numerical prefactor */ void Oabovenew( struct Grid *out, struct Grid *in, int sign, double forwardfilter[], double reversefilter[], double fullfilter[], struct Grid *workgrid, double *workspace1, double *workspace2) { int i,j; /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (in==NULL) return; /*Make sure output on toplevel is zero if we are not doing a cumulative convolution*/ if ( (sign==0) && (in->level == 0) ) for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++) out->dat[i]=0; if ( ( in->level > 1 ) && (in->child) ) /* Take I from workgrid->parent onto workgrid. We also add the values of in onto the target workgrid, to ensure that we actually take I of in onto workgrid without changing in itself*/ aconvP3dadd( workgrid->parent->dat+ (workgrid->org.x * in->parent->dim.y + workgrid->org.y) * in->parent->dim.z + workgrid->org.z, workgrid->dat, in->dat, tr, tf, ts, workgrid->parent->dim.x, workgrid->parent->dim.y, workgrid->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_T, workspace1, workspace2 ); else if ( ( in->level == 1 ) && (in->child) ) { /* Take I from in->parent onto workgrid */ aconvP3dadd( in->parent->dat+ (workgrid->org.x * in->parent->dim.y + workgrid->org.y) * in->parent->dim.z + workgrid->org.z, workgrid->dat, in->dat, tr, tf, ts, in->parent->dim.x, in->parent->dim.y, in->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_T, workspace1, workspace2 ); } /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_O2;j++){ reversefilter[0*(1+HFL_O2)+j] = (O2l_r[0*(1+HFL_O2)+j])*in->scale.x; forwardfilter[0*(1+HFL_O2)+j] = (O2l_f[0*(1+HFL_O2)+j]); fullfilter[0*(1+HFL_O2)+j] = (O2l_s[0*(1+HFL_O2)+j])*in->scale.x; reversefilter[1*(1+HFL_O2)+j] = (O2l_r[1*(1+HFL_O2)+j])*in->scale.y; forwardfilter[1*(1+HFL_O2)+j] = (O2l_f[1*(1+HFL_O2)+j]); fullfilter[1*(1+HFL_O2)+j] = (O2l_s[1*(1+HFL_O2)+j])*in->scale.y; reversefilter[2*(1+HFL_O2)+j] = (O2l_r[2*(1+HFL_O2)+j])*in->scale.z; forwardfilter[2*(1+HFL_O2)+j] = (O2l_f[2*(1+HFL_O2)+j]); fullfilter[2*(1+HFL_O2)+j] = (O2l_s[2*(1+HFL_O2)+j])*in->scale.z; } if (workgrid->level > 1) /*O2level from workgrid->parent to out*/ aconvP3d( workgrid->parent->dat+ +(workgrid->org.x * workgrid->parent->dim.y + workgrid->org.y) * workgrid->parent->dim.z + workgrid->org.z, out->dat, sign, reversefilter, forwardfilter, fullfilter, workgrid->parent->dim.x, workgrid->parent->dim.y, workgrid->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_O2, workspace1, workspace2 ); else if (workgrid->level == 1){ /*O2level directly from in->parent to out*/ aconvP3d( in->parent->dat+ +(out->org.x * in->parent->dim.y + out->org.y) * in->parent->dim.z + out->org.z, out->dat, sign, reversefilter, forwardfilter, fullfilter, in->parent->dim.x, in->parent->dim.y, in->parent->dim.z, out->dim.x, out->dim.y, out->dim.z, HFL_O2, workspace1, workspace2 ); } /* Recursive call to Oabovenew for children. */ if (in->child) Oabovenew(out->child,in->child,sign, forwardfilter,reversefilter, fullfilter, workgrid->child,workspace1,workspace2); /* Recursive call to Oabovenew for siblings.*/ if (in->sibling) Oabovenew(out->sibling,in->sibling,sign, forwardfilter,reversefilter, fullfilter, workgrid->sibling,workspace1, workspace2); } /* self: input grid. Be very careful, we assume that the input grid is zero on its ghostpoints. workspace: memory block where some intermediate work is done */ void Osame(struct Grid *self, double *workspace1, double *workspace2) { /*Take Osame on self. Need to make it possible to distinguish between periodic boundary conditions and non periodic b.c.*/ conv3d(self->dat,O1l_r,O1l_f,O1l_s, /* added O1l_r and 01L_s */ self->dim.x, self->dim.y, self->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_O1); /*Need 0 on ghostpoints*/ if (self->level > 0) zeroghost(self); if (self->sibling) Osame(self->sibling,workspace1, workspace2); if (self->child) Osame(self->child,workspace1, workspace2); } /*Calculates the contribution to O from grids on the same level. out: output grid self: input grid, will not be changed sign: tells whether calculation is cumulative or not forwardfilter: workspace for filters in use workspace: workspace for the convolutions */ void Osamenew(struct Grid *out, struct Grid *self, int sign, double forwardfilter[], double reversefilter[], double fullfilter[], double *workspace1, double *workspace2) { int j; /* Debugging check the input vector */ /* if(self->level==0){ printf("print input at beg. of Osameenew: %le\n", self->dat[43583]); printf("print output at beg. of Osamenew: %le\n", out->dat[43583]); } */ for (j=0;j<=HFL_O1;j++) { reversefilter[0*(1+HFL_O1)+j] = (O1l_r[0*(1+HFL_O1)+j])*self->scale.x; forwardfilter[0*(1+HFL_O1)+j] = (O1l_f[0*(1+HFL_O1)+j]); fullfilter[0*(1+HFL_O1)+j] = (O1l_s[0*(1+HFL_O1)+j])*self->scale.x; reversefilter[1*(1+HFL_O1)+j] = (O1l_r[1*(1+HFL_O1)+j])*self->scale.y; forwardfilter[1*(1+HFL_O1)+j] = (O1l_f[1*(1+HFL_O1)+j]); fullfilter[1*(1+HFL_O1)+j] = (O1l_s[1*(1+HFL_O1)+j])*self->scale.y; reversefilter[2*(1+HFL_O1)+j] = (O1l_r[2*(1+HFL_O1)+j])*self->scale.z; forwardfilter[2*(1+HFL_O1)+j] = (O1l_f[2*(1+HFL_O1)+j]); fullfilter[2*(1+HFL_O1)+j] = (O1l_s[2*(1+HFL_O1)+j])*self->scale.z; } /*Take Osame on self. Need to make it possible to distinguish between periodic boundary conditions and non periodic b.c.*/ /* IPD: now the grids know themselves of the boundaryconditions */ conv3dnew(self->dat, out->dat, sign, self->ifperiodic, reversefilter,forwardfilter,fullfilter, self->dim.x, self->dim.y, self->dim.z, HFL_O1, workspace1, workspace2); // if (self->parent) /*non periodic convolution */ // conv3dnew(self->dat, // out->dat, // sign, // non_periodic, // reversefilter,forwardfilter,fullfilter, // self->dim.x, // self->dim.y, // self->dim.z, // HFL_O1, workspace1, workspace2); // else /*periodic convolution */ // conv3dnew(self->dat, // out->dat, // sign, // do_periodic, // reversefilter,forwardfilter,fullfilter, // self->dim.x, // self->dim.y, // self->dim.z, // HFL_O1, workspace1, workspace2); /* Debugging */ /* if(self->level==0){ printf("print input from Osamenew: %le\n", self->dat[43583]); printf("print output from Osamenew: %le\n", out->dat[43583]); } */ if (self->sibling) Osamenew(out->sibling,self->sibling,sign, forwardfilter,reversefilter, fullfilter, workspace1, workspace2); if (self->child) Osamenew(out->child,self->child,sign, forwardfilter,reversefilter, fullfilter, workspace1, workspace2); } void Obelowsimple( struct Grid *self, double *workspace1, double *workspace2, double forwardfilter[], double reversefilter[], double fullfilter[]) { int i,j,k; for (j=0;j<=HFL_O2;j++) { reversefilter[0*(1+HFL_O2)+j] = (O2l_r[0*(1+HFL_O2)+j])*self->scale.x; forwardfilter[0*(1+HFL_O2)+j] = (O2l_f[0*(1+HFL_O2)+j]); fullfilter[0*(1+HFL_O2)+j] = (O2l_s[0*(1+HFL_O2)+j])*self->scale.x; reversefilter[1*(1+HFL_O2)+j] = (O2l_r[1*(1+HFL_O2)+j])*self->scale.y; forwardfilter[1*(1+HFL_O2)+j] = (O2l_f[1*(1+HFL_O2)+j]); fullfilter[1*(1+HFL_O2)+j] = (O2l_s[1*(1+HFL_O2)+j])*self->scale.y; reversefilter[2*(1+HFL_O2)+j] = (O2l_r[2*(1+HFL_O2)+j])*self->scale.z; forwardfilter[2*(1+HFL_O2)+j] = (O2l_f[2*(1+HFL_O2)+j]); fullfilter[2*(1+HFL_O2)+j] = (O2l_s[2*(1+HFL_O2)+j])*self->scale.z; } /* O2lconvolution from self to self->parent */ if (self->level > 0) aPconv3d( self->parent->dat+ ( (self->org.x) * self->parent->dim.y + (self->org.y)) * self->parent->dim.z + (self->org.z), self->dat, 1, reversefilter, forwardfilter, fullfilter, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_O2, workspace1, workspace2 ); /*Remove all datapoints on self*/ for (i=0;i< ( (self->dim.x)*(self->dim.y)*(self->dim.z) );i++ ) self->dat[i]=0; /*Same process on siblings*/ if (self->sibling) Obelowsimple(self->sibling, workspace1, workspace2, forwardfilter, reversefilter, fullfilter); /*Same process on children*/ if (self->child) Obelowsimple(self->child,workspace1, workspace2, forwardfilter,reversefilter,fullfilter); /*This is a time consuming hack. Need to replace it with modified version of Idag very soon, but for now we keep it to see if we can get the numbers to work out correctly.*/ if (self->level>0) for (i=0; idim.x; i+=2) for (j=0; jdim.y; j+=2) for (k=0; kdim.z; k+=2) *( self->parent->dat + ( (self->org.x+i/2) * self->parent->dim.y + (self->org.y+j/2) ) * self->parent->dim.z + (self->org.z+k/2) ) += *( self->dat+(i*self->dim.y+j)*self->dim.z+k ); /* Idag convolution from self to self->parent This part of the function ensures that we get the contribution to O from ALL levels below, not just the level right below Need to make condition a little more rigorous than this*/ if ( (self->parent) && (self->child) ) aPconv3d( self->parent->dat+ ( (self->org.x) * self->parent->dim.y + (self->org.y)) * self->parent->dim.z + (self->org.z), self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, workspace1, workspace2 ); } /*Calculates the contribution to O from grids on all levels below. out: output grid self: input grid, will not be changed sign: tells whether calculation is cumulative or not forwardfilter: workspace for filters in use workspace: workspace for the convolutions */ void Obelownew( struct Grid *out, struct Grid *self, int sign, double forwardfilter[], double reversefilter[], double fullfilter[], double *workspace1, double *workspace2) { int i,j,k; /* Debugging check the input vector */ /* if(self->level==0){ printf("print input at beg. of Obelownew: %le\n", self->dat[43583]); printf("print output at beg. of Obelownew: %le\n", out->dat[43583]); } */ /*Same process on siblings*/ if (self->sibling) Obelownew(out->sibling,self->sibling,sign, forwardfilter,reversefilter,fullfilter, workspace1,workspace2); /*Same process on children*/ if (self->child) Obelownew(out->child,self->child,sign, forwardfilter,reversefilter,fullfilter, workspace1,workspace2); /*Determine overlap coefficients, including scaling factors*/ for (j=0;j<=HFL_O2;j++) { reversefilter[0*(1+HFL_O2)+j] = (O2l_r[0*(1+HFL_O2)+j])*self->scale.x; forwardfilter[0*(1+HFL_O2)+j] = (O2l_f[0*(1+HFL_O2)+j]); fullfilter[0*(1+HFL_O2)+j] = (O2l_s[0*(1+HFL_O2)+j])*self->scale.x; reversefilter[1*(1+HFL_O2)+j] = (O2l_r[1*(1+HFL_O2)+j])*self->scale.y; forwardfilter[1*(1+HFL_O2)+j] = (O2l_f[1*(1+HFL_O2)+j]); fullfilter[1*(1+HFL_O2)+j] = (O2l_s[1*(1+HFL_O2)+j])*self->scale.y; reversefilter[2*(1+HFL_O2)+j] = (O2l_r[2*(1+HFL_O2)+j])*self->scale.z; forwardfilter[2*(1+HFL_O2)+j] = (O2l_f[2*(1+HFL_O2)+j]); fullfilter[2*(1+HFL_O2)+j] = (O2l_s[2*(1+HFL_O2)+j])*self->scale.z; } if ( (sign == 0) && (self->child==NULL) ) /*Have to zero out every gridpoint on bottom level, will not be necessary in final version of Obelownew, need to change that eventually*/ for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++) out->dat[i]=0; if ( (self->level > 0) ) { if (sign == 0) if (self->sibling == NULL) /*Have to zero out every gridpoint on out->parent*/ for (i=0; i<(out->parent->dim.x*out->parent->dim.y*out->parent->dim.z); i++) out->parent->dat[i]=0; /* O2lconvolution from self to out->parent */ aPconv3d( out->parent->dat+ ( (self->org.x) * out->parent->dim.y + (self->org.y)) * out->parent->dim.z + (self->org.z), self->dat, 1, reversefilter, forwardfilter, fullfilter, out->parent->dim.x, out->parent->dim.y, out->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_O2, workspace1, workspace2 ); /*This is a hack which could be improved by doing the addition inside of the aPconv3d function call below, but it only costs about 2% in performance, so I will not modify the function to make that possible for now..*/ for (i=0; idim.x; i+=2) for (j=0; jdim.y; j+=2) for (k=0; kdim.z; k+=2) *( out->parent->dat + ( (out->org.x+i/2) * out->parent->dim.y + (out->org.y+j/2) ) * out->parent->dim.z + (out->org.z+k/2) ) += *( out->dat+(i*out->dim.y+j)*out->dim.z+k ); /* Idag convolution from out to out->parent This part of the function ensures that we get the contribution to O from ALL levels below, not just the level right below Need to make condition a little more rigorous than this*/ if (out->child) aPconv3d( out->parent->dat+ ( (out->org.x) * out->parent->dim.y + (out->org.y)) * out->parent->dim.z + (out->org.z), out->dat, 1, tr, tf, ts, out->parent->dim.x, out->parent->dim.y, out->parent->dim.z, out->dim.x, out->dim.y, out->dim.z, HFL_T, workspace1, workspace2 ); } } /*Multilevel overlap function, input - output in: the inputgridstructure, Is assumed to be zero on it ghostpoints. If is not, then run it once through zeroghostrec to ensure consistency workgrid: a grid onto which we place the results of some intermediate calculations workspace: additional workspace which is used inside the convolutions */ void O( struct Grid *out, struct Grid *in, double O1lf[], double O1lr[], double O1lfull[], double O2lf[], double O2lr[], double O2lfull[], struct Grid *workgrid, double *workspace1, double *workspace2) { /* Use a buffer grid, so that O(in, in) would work properly */ /* struct Grid *Tgrid; */ /*Uncomment the line below if not zero on ghostpoints*/ /* zeroghostrec(in); */ if ( (HFL_T>HFL_O2) || (HFL_O1>HFL_O2) || (HFL_T>HFL_O1) ) { printf("Warning: our code assumes HFL_O2 > HFL_O1 > HFL_T\n"); printf("while you have entered the lengths %d %d %d\n\n", HFL_O2,HFL_O1,HFL_T); printf("This creates problems with the size of workspace\n"); printf("so you must change the parameters determining the workspace.\n"); exit(1); } /* #ifdef newgridtype Tgrid=mkgridfromgridnew(out,NULL); #else Tgrid=mkgridfromgrid(out,NULL); #endif */ /*Find contributions to O from grid at all levels below*/ Obelownew(out,in,0,O2lf,O2lr,O2lfull,workspace1,workspace2); /* Obelownew(Tgrid,in,0,O2lf,O2lr,O2lfull,workspace1,workspace2); */ /*Find contributions to O from levels above*/ Oabovenew(out,in,1,O2lf,O2lr,O2lfull,workgrid,workspace1,workspace2); /* Oabovenew(Tgrid,in,1,O2lf,O2lr,O2lfull,workgrid,workspace1,workspace2); */ /*Find contributions to O from same level*/ Osamenew(out,in,1,O1lf,O1lr,O2lfull,workspace1,workspace2); /* Osamenew(Tgrid,in,1,O1lf,O1lr,O2lfull,workspace1,workspace2); */ /* #ifdef newgridtype copygridnew(out, Tgrid); killgridnew(Tgrid); #else copygrid(out, Tgrid); killgrid(Tgrid); #endif */ /*Make sure the function returns a grid with zeros on ghostpoints*/ zeroghostrec(out); /* what if we try the simple versions?? - does it inplace - not too bad; Obelowsimple(in,workspace1,workspace2,0,O2lf,O2lr,O2lfull); Oabovesimple(in,workspace1,workspace2,O2lf,O2lr,O2lfull); Osame(in, workspace1, workspace2) */ } /****************************************************************/ /* Routines for L */ /* Calculates the contribution to L from all levels above current level. in: the input grid of this calculation workgrid: grid onto which we calculate I(in) out: output grid workspace: additional workspace which is used inside the convolutions forwardfilter: workspace for actual filters to be used in the convolution, different from constant filter because it also includes numerical prefactor */ void Labovenew( struct Grid *out, struct Grid *in, int sign, double forwardfilter[], double reversefilter[], double fullfilter[], struct Grid *workgrid, double *workspace1, double *workspace2) { int i,j; /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (in==NULL) return; /*Make sure output on toplevel is zero if we are not doing a cumulative convolution*/ if ( (sign==0) && (in->level == 0) ) for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++) out->dat[i]=0; if ( ( in->level > 1 ) && (in->child) ) /* Take I from workgrid->parent onto workgrid. We also add the values of in onto the target workgrid, to ensure that we actually take I of in onto workgrid without changing in itself*/ aconvP3dadd( workgrid->parent->dat+ (workgrid->org.x * in->parent->dim.y + workgrid->org.y) * in->parent->dim.z + workgrid->org.z, workgrid->dat, in->dat, tr, tf, ts, workgrid->parent->dim.x, workgrid->parent->dim.y, workgrid->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_T, workspace1,workspace2 ); else if ( ( in->level == 1 ) && (in->child) ) /* Take I from in->parent onto workgrid */ aconvP3dadd( in->parent->dat+ (workgrid->org.x * in->parent->dim.y + workgrid->org.y) * in->parent->dim.z + workgrid->org.z, workgrid->dat, in->dat, tr, tf, ts, in->parent->dim.x, in->parent->dim.y, in->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_T, workspace1,workspace2 ); /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L2;j++) { reversefilter[0*(1+HFL_L2)+j]=(L2l_r[j])/in->scale.x; forwardfilter[0*(1+HFL_L2)+j]=(L2l_f[j]); fullfilter[0*(1+HFL_L2)+j]=(L2l_s[j])/in->scale.x; reversefilter[1*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.y; forwardfilter[1*(1+HFL_L2)+j]=(O2l_f[j]); fullfilter[1*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.y; reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.z; forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j]); fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.z; /* reversefilter[0*(1+HFL_L2)+j]=(L2l_r[j])*(pow(sqrt(2),in->level)); */ /* forwardfilter[0*(1+HFL_L2)+j]=(L2l_f[j])*(pow(sqrt(2),in->level)); */ /* fullfilter[0*(1+HFL_L2)+j]=(L2l_s[j])*(pow(2,in->level)); */ /* reversefilter[1*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */ /* forwardfilter[1*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */ /* fullfilter[1*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */ /* reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */ /* forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */ /* fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */ } if (workgrid->level > 1) /*L2level in x-direction from workgrid->parent to out*/ aconvP3d( workgrid->parent->dat+ +(workgrid->org.x * workgrid->parent->dim.y + workgrid->org.y) * workgrid->parent->dim.z + workgrid->org.z, out->dat, sign, reversefilter, forwardfilter, fullfilter, workgrid->parent->dim.x, workgrid->parent->dim.y, workgrid->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_L2, workspace1,workspace2 ); else if (workgrid->level == 1) /*L2level in x-direction directly from in->parent to out*/ aconvP3d( in->parent->dat+ +(out->org.x * in->parent->dim.y + out->org.y) * in->parent->dim.z + out->org.z, out->dat, sign, reversefilter, forwardfilter, fullfilter, in->parent->dim.x, in->parent->dim.y, in->parent->dim.z, out->dim.x, out->dim.y, out->dim.z, HFL_L2, workspace1, workspace2 ); /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L2;j++) { reversefilter[0*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.x; forwardfilter[0*(1+HFL_L2)+j]=(O2l_f[j]); fullfilter[0*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.x; reversefilter[1*(1+HFL_L2)+j]=(L2l_r[j])/in->scale.y; forwardfilter[1*(1+HFL_L2)+j]=(L2l_f[j]); fullfilter[1*(1+HFL_L2)+j]=(L2l_s[j])/in->scale.y; reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.z; forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j]); fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.z; /* reversefilter[0*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */ /* forwardfilter[0*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */ /* fullfilter[0*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */ /* reversefilter[1*(1+HFL_L2)+j]=(L2l_r[j])*(pow(sqrt(2),in->level)); */ /* forwardfilter[1*(1+HFL_L2)+j]=(L2l_f[j])*(pow(sqrt(2),in->level)); */ /* fullfilter[1*(1+HFL_L2)+j]=(L2l_s[j])*(pow(2,in->level)); */ /* reversefilter[2*(1+HFL_L2)+j]=(O2l_r[j])/(pow(sqrt(2),in->level)); */ /* forwardfilter[2*(1+HFL_L2)+j]=(O2l_f[j])/(pow(sqrt(2),in->level)); */ /* fullfilter[2*(1+HFL_L2)+j]=(O2l_s[j])/(pow(2,in->level)); */ } if (workgrid->level > 1) /*L2level in y-direction from workgrid->parent to out*/ aconvP3d( workgrid->parent->dat+ +(workgrid->org.x * workgrid->parent->dim.y + workgrid->org.y) * workgrid->parent->dim.z + workgrid->org.z, out->dat, 1, reversefilter, forwardfilter, fullfilter, workgrid->parent->dim.x, workgrid->parent->dim.y, workgrid->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_L2, workspace1,workspace2 ); else if (workgrid->level == 1) /*L2level in y-direction directly from in->parent to out*/ aconvP3d( in->parent->dat+ +(out->org.x * in->parent->dim.y + out->org.y) * in->parent->dim.z + out->org.z, out->dat, 1, reversefilter, forwardfilter, fullfilter, in->parent->dim.x, in->parent->dim.y, in->parent->dim.z, out->dim.x, out->dim.y, out->dim.z, HFL_L2, workspace1, workspace2 ); /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L2;j++) { reversefilter[0*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.x; forwardfilter[0*(1+HFL_L2)+j]=(O2l_f[j]); fullfilter[0*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.x; reversefilter[1*(1+HFL_L2)+j]=(O2l_r[j])*in->scale.y; forwardfilter[1*(1+HFL_L2)+j]=(O2l_f[j]); fullfilter[1*(1+HFL_L2)+j]=(O2l_s[j])*in->scale.y; reversefilter[2*(1+HFL_L2)+j]=(L2l_r[j])/in->scale.z; forwardfilter[2*(1+HFL_L2)+j]=(L2l_f[j]); fullfilter[2*(1+HFL_L2)+j]=(L2l_s[j])/in->scale.z; } if (workgrid->level > 1) /*L2level in z-direction from workgrid->parent to out*/ aconvP3d( workgrid->parent->dat+ +(workgrid->org.x * workgrid->parent->dim.y + workgrid->org.y) * workgrid->parent->dim.z + workgrid->org.z, out->dat, 1, reversefilter, forwardfilter, fullfilter, workgrid->parent->dim.x, workgrid->parent->dim.y, workgrid->parent->dim.z, workgrid->dim.x, workgrid->dim.y, workgrid->dim.z, HFL_L2, workspace1, workspace2 ); else if (workgrid->level == 1) /*L2level in z-direction directly from in->parent to out*/ aconvP3d( in->parent->dat+ +(out->org.x * in->parent->dim.y + out->org.y) * in->parent->dim.z + out->org.z, out->dat, 1, reversefilter, forwardfilter, fullfilter, in->parent->dim.x, in->parent->dim.y, in->parent->dim.z, out->dim.x, out->dim.y, out->dim.z, HFL_L2, workspace1, workspace2 ); /* Recursive call to Labovenew for children. */ if (in->child) Labovenew(out->child,in->child,sign, forwardfilter, reversefilter, fullfilter, workgrid->child, workspace1, workspace2 ); /* Recursive call to Labovenew for siblings.*/ if (in->sibling) Labovenew(out->sibling,in->sibling,sign, forwardfilter,reversefilter, fullfilter, workgrid->sibling,workspace1, workspace2 ); } /*Calculates the contribution to L from grids on the same level. out: output grid self: input grid, will not be changed sign: tells whether calculation is cumulative or not forwardfilter: workspace for filters in use workspace: workspace for the convolutions */ void Lsamenew(struct Grid *out, struct Grid *self, int sign, double forwardfilter[], double reversefilter[], double fullfilter[], double *workspace1, double *workspace2) { int j; /*Determine appropriate filter for convolution in x-direction (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L1;j++) { reversefilter[0*(1+HFL_L1)+j]=(L1l_r[j])/self->scale.x; forwardfilter[0*(1+HFL_L1)+j]=(L1l_f[j]); fullfilter[0*(1+HFL_L1)+j]=(L1l_s[j])/self->scale.x;; reversefilter[1*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.y; forwardfilter[1*(1+HFL_L1)+j]=(O1l_f[j]); fullfilter[1*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.y; reversefilter[2*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.z; forwardfilter[2*(1+HFL_L1)+j]=(O1l_f[j]); fullfilter[2*(1+HFL_L1)+j]=(O1l_f[j])*self->scale.z; } /*Take Lsame on self. Need to make it possible to distinguish between periodic boundary conditions and non periodic b.c.*/ /* IPD: Now the grids know about the boundary conditions */ /* Do convolution, dy dz / dx */ conv3dnew(self->dat, out->dat, sign, self->ifperiodic, reversefilter,forwardfilter, fullfilter, self->dim.x, self->dim.y, self->dim.z, HFL_L1, workspace1, workspace2); /*Determine appropriate filter for convolution in y-direction (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L1;j++) { reversefilter[0*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.x; forwardfilter[0*(1+HFL_L1)+j]=(O1l_f[j]); fullfilter[0*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.x; reversefilter[1*(1+HFL_L1)+j]=(L1l_r[j])/self->scale.y; forwardfilter[1*(1+HFL_L1)+j]=(L1l_f[j]); fullfilter[1*(1+HFL_L1)+j]=(L1l_s[j])/self->scale.y; reversefilter[2*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.z; forwardfilter[2*(1+HFL_L1)+j]=(O1l_f[j]); fullfilter[2*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.z; } /* Do convolution,dx dz / dy */ conv3dnew(self->dat, out->dat, 1, self->ifperiodic, reversefilter,forwardfilter, fullfilter, self->dim.x, self->dim.y, self->dim.z, HFL_L1, workspace1, workspace2); /*Determine appropriate filter for convolution in z-direction (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L1;j++) { reversefilter[0*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.x; forwardfilter[0*(1+HFL_L1)+j]=(O1l_f[j]); fullfilter[0*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.x; reversefilter[1*(1+HFL_L1)+j]=(O1l_r[j])*self->scale.y; forwardfilter[1*(1+HFL_L1)+j]=(O1l_f[j]); fullfilter[1*(1+HFL_L1)+j]=(O1l_s[j])*self->scale.y; reversefilter[2*(1+HFL_L1)+j]=(L1l_r[j])/self->scale.z; forwardfilter[2*(1+HFL_L1)+j]=(L1l_f[j]); fullfilter[2*(1+HFL_L1)+j]=(L1l_s[j])/self->scale.z; } /* Do convolution,dx dy /dz */ conv3dnew(self->dat, out->dat, 1, self->ifperiodic, reversefilter,forwardfilter, fullfilter, self->dim.x, self->dim.y, self->dim.z, HFL_L1, workspace1, workspace2); if (self->sibling) Lsamenew(out->sibling,self->sibling,sign, forwardfilter,reversefilter,fullfilter, workspace1, workspace2); if (self->child) Lsamenew(out->child,self->child,sign, forwardfilter,reversefilter, fullfilter, workspace1, workspace2); } /*Calculates the contribution to L from grids on all levels below. out: output grid self: input grid, will not be changed sign: tells whether calculation is cumulative or not forwardfilter: workspace for filters in use workspace: workspace for the convolutions */ void Lbelownew( struct Grid *out, struct Grid *self, int sign, double forwardfilter[], double reversefilter[], double fullfilter[], double *workspace1, double *workspace2) { int i,j,k; /*Same process on siblings*/ if (self->sibling) Lbelownew(out->sibling,self->sibling,sign, forwardfilter,reversefilter,fullfilter, workspace1,workspace2); /*Same process on children*/ if (self->child) Lbelownew(out->child,self->child,sign, forwardfilter,reversefilter,fullfilter, workspace1,workspace2); /*Have to zero out every gridpoint on bottom level, will not be necessary in final version of Obelow, need to change that eventually*/ if ( (sign == 0) && (self->child==NULL) ) for (i=0;i<(out->dim.x*out->dim.y*out->dim.z);i++) out->dat[i]=0; if ( (self->level > 0) ) { /*First do L in x-direction */ /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L2;j++) { reversefilter[0*(1+HFL_L2)+j] = (L2l_r[j])/self->scale.x; forwardfilter[0*(1+HFL_L2)+j] = (L2l_f[j]); fullfilter[0*(1+HFL_L2)+j] = (L2l_s[j])/self->scale.x; reversefilter[1*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.y; forwardfilter[1*(1+HFL_L2)+j] = (O2l_f[j]); fullfilter[1*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.y; reversefilter[2*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.z; forwardfilter[2*(1+HFL_L2)+j] = (O2l_f[j]); fullfilter[2*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.z; } if (sign == 0) if (self->sibling == NULL) /*Have to zero out every gridpoint on out->parent*/ for (i=0; i<(out->parent->dim.x*out->parent->dim.y*out->parent->dim.z); i++) out->parent->dat[i]=0; /* L2lconvolution from self to out->parent */ aPconv3d( out->parent->dat+ ( (self->org.x) * out->parent->dim.y + (self->org.y)) * out->parent->dim.z + (self->org.z), self->dat, 1, reversefilter, forwardfilter, fullfilter, out->parent->dim.x, out->parent->dim.y, out->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_L2, workspace1, workspace2 ); /*Next do L in y-direction */ /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L2;j++) { reversefilter[0*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.x; forwardfilter[0*(1+HFL_L2)+j] = (O2l_f[j]); fullfilter[0*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.x; reversefilter[1*(1+HFL_L2)+j] = (L2l_r[j])/self->scale.y; forwardfilter[1*(1+HFL_L2)+j] = (L2l_f[j]); fullfilter[1*(1+HFL_L2)+j] = (L2l_s[j])/self->scale.y; reversefilter[2*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.z; forwardfilter[2*(1+HFL_L2)+j] = (O2l_f[j]); fullfilter[2*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.z; } /* L2lconvolution from self to out->parent */ aPconv3d( out->parent->dat+ ( (self->org.x) * out->parent->dim.y + (self->org.y)) * out->parent->dim.z + (self->org.z), self->dat, 1, reversefilter, forwardfilter, fullfilter, out->parent->dim.x, out->parent->dim.y, out->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_L2, workspace1, workspace2 ); /*Finally do L in z-direction */ /*Determine appropriate filter (including factors which take into consideration the scale we are operating at) */ for (j=0;j<=HFL_L2;j++) { reversefilter[0*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.x; forwardfilter[0*(1+HFL_L2)+j] = (O2l_f[j]); fullfilter[0*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.x; reversefilter[1*(1+HFL_L2)+j] = (O2l_r[j])*self->scale.y; forwardfilter[1*(1+HFL_L2)+j] = (O2l_f[j]); fullfilter[1*(1+HFL_L2)+j] = (O2l_s[j])*self->scale.y; reversefilter[2*(1+HFL_L2)+j] = (L2l_r[j])/self->scale.z; forwardfilter[2*(1+HFL_L2)+j] = (L2l_f[j]); fullfilter[2*(1+HFL_L2)+j] = (L2l_s[j])/self->scale.z; } /* L2lconvolution from self to out->parent */ aPconv3d( out->parent->dat+ ( (self->org.x) * out->parent->dim.y + (self->org.y)) * out->parent->dim.z + (self->org.z), self->dat, 1, reversefilter, forwardfilter, fullfilter, out->parent->dim.x, out->parent->dim.y, out->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_L2, workspace1, workspace2 ); /*This is a hack which could be improved by doing the addition inside of the aPconv3d function call below, but it only costs about 2% in performance, so I will not modify the function to make that possible for now..*/ for (i=0; idim.x; i+=2) for (j=0; jdim.y; j+=2) for (k=0; kdim.z; k+=2) *( out->parent->dat + ( (out->org.x+i/2) * out->parent->dim.y + (out->org.y+j/2) ) * out->parent->dim.z + (out->org.z+k/2) ) += *( out->dat+(i*out->dim.y+j)*out->dim.z+k ); /* Idag convolution from out to out->parent This part of the function ensures that we get the contribution to O from ALL levels below, not just the level right below Need to make condition a little more rigorous than this*/ if (out->child) aPconv3d( out->parent->dat+ ( (out->org.x) * out->parent->dim.y + (out->org.y)) * out->parent->dim.z + (out->org.z), out->dat, 1, tr, tf, ts, out->parent->dim.x, out->parent->dim.y, out->parent->dim.z, out->dim.x, out->dim.y, out->dim.z, HFL_T, workspace1, workspace2 ); } } /*Multilevel Laplacian function (in place, recursive) input: the input and output grid of this calculation workgrid: a grid onto which we place the results of some intermediate calculations workspace: additional workspace which is used inside the convolutions */ void L( struct Grid *out, struct Grid *in, double L1lf[], double L1lr[], double L1lfull[], double L2lf[], double L2lr[], double L2lfull[], struct Grid *workgrid,double *workspace1, double *workspace2) { /* use a buffer grid, so that L(in, in) works properly as well */ /* struct Grid *Tgrid; */ if ( (HFL_T>HFL_O2) || (HFL_O1>HFL_O2) || (HFL_T>HFL_O1) ) { printf("Warning: our code assumes HFL_O2 > HFL_O1 > HFL_T\n"); printf("while you have entered the lengths %d %d %d\n\n", HFL_O2,HFL_O1,HFL_T); printf("This creates problems with the size of workspace\n"); printf("so you must change the parameters determining the workspace.\n"); exit(1); } /* We'll have to figure out where to put this, because it can't stay global. */ /* no_of_times_L_called+=1; */ /* #ifdef newgridtype Tgrid=mkgridfromgridnew(out, NULL); #else Tgrid=mkgridfromgrid(out, NULL); #endif */ /*Find contributions to L from grid at all levels below*/ Lbelownew(out,in,0,L2lf,L2lr,L2lfull,workspace1,workspace2); /* Lbelownew(Tgrid,in,0,L2lf,L2lr,L2lfull,workspace1,workspace2); */ /*Find contributions to L from levels above*/ Labovenew(out,in,1,L2lf,L2lr,L2lfull,workgrid,workspace1,workspace2); /* Labovenew(Tgrid,in,1,L2lf,L2lr,L2lfull,workgrid,workspace1,workspace2); */ /*Find contributions to L from same level*/ Lsamenew(out,in,1,L1lf,L1lr,L1lfull,workspace1,workspace2); /* Lsamenew(Tgrid,in,1,L1lf,L1lr,L1lfull,workspace1,workspace2); */ /* #ifdef newgridtype copygridnew(out, Tgrid); killgridnew(Tgrid); #else copygrid(out, Tgrid); killgrid(Tgrid); #endif */ zeroghostrec(out); }