/* DFT++ is a density functional package developed by the research group of Professor Tomas Arias Copyright 1996-2003 Sohrab Ismail-Beigi This file is part of DFT++. DFT++ is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DFT++ is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DFT++; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please see the file CREDITS for a list of authors. For academic users, we request that publications using results obtained with this software reference "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000). and, if using the wavelet basis, further reference "Multiresolution analysis of electronic structure: semicardinal and wavelet bases," T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999). and "Robust ab initio calculation of condensed matter: transparent convergence through semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003). For your convenience, preprints of the above articles may be obtained from http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively. */ /****************************************************************/ /* Routines for D */ /* D(i) is the basic operation of "EVALUATING" the derivative of a function in the ith direction on all points of the realspace grid given the function's coefficient space representation. Ddag(i) is it's Hermitian conjugate in the sense of identification of real space points with their ghosts. */ #include #include #include "WL_header.h" /* 3d array addressing short-hand */ #define DAT(a,x,y,z,Nx,Ny,Nz) ( (a)->dat[((x)*Ny+(y))*Nz+(z)] ) void Dacc(struct Grid *self) { /* Indices */ int x,y,z; /* Go to bottom first */ if (self->child) Dacc(self->child); /* Do all siblings BEFORE coming up levels */ if (self->sibling) Dacc(self->sibling); /* Accumulate ghost data up to parent (if there is one!) */ if (self->parent) { /* Convenient navigational parameters */ int Nx=self->dim.x,Ny=self->dim.y,Nz=self->dim.z; int Npx=self->parent->dim.x,Npy=self->parent->dim.y,Npz=self->parent->dim.z; for (x=0; xdim.x/2; x++) for (y=0; ydim.y/2; y++) for (z=0; zdim.z/2; z++) self-> parent-> dat[((self->org.x+x)*Npy+(self->org.y+y))*Npz+(self->org.z+z) ] += self->dat[(2*x*Ny+2*y)*Nz+2*z]; } } /* Basic convolution kernel Convolves multiple data sets (points, lines, or planes). Output stored contiguously as a three dimensional object; navigation through input accomplished through various strides. output: pointer to start of output array input: pointer to element in input array which corresponds to origin of output array periodic: tells whether input is to be treated as periodic (no/yes=0/1) sign: 0 (write into output), -1 (subtract from output), 1 (add to output) c: convolution coefficients for 0, 1, 2, ... hfl: half-filter length, extent of filter on either side beyond zero symm: reflection symmetry of filter, +/-1 N: number of objects to be produced in output size: size of objects (points, lines, planes) convolved together stride_obj: stride between objects in input convs: number of convolutions to perform stride_conv: stride between starting convolution locations in output Notes: For convolutions along z we have the exploitable special case of stride_obj=1, size=1. In general, we have for convolutions along direction i N=N_i size=prod_{j>i} N_j stride_obj=prod_{j>i} N_j convs=prod_{j=i} N_j */ void convolve(double *output,double *input,int periodic,int sign, double *c,int hfl,int symm, int N,int size,int stride_obj,int convs,int stride_conv) { int ic,n,el; if (sign==1) { printf("sign=1 not supported in convolve()!!!\n"); exit(1); } if (sign==0) { /* Special case for optimization */ if (symm==-1) { switch (hfl) { case 2: for (ic=0; icdim; /* For getting appropriate filter scale */ double d1[1+HFL_D1],D; int sign,symm,N,size,stride_obj,convs,stride_conv; /* Make version of filter scaled to appropo direction */ switch (d) { case 0: D=1./in->scale.x; break; case 1: D=1./in->scale.y; break; case 2: D=1./in->scale.z; break; default: printf("Illegal direction d=%d in Dsame!\n",d); exit(1); } for (x=0; x<=HFL_D1; x++) d1[x]=D1[x]*D; switch (d) { case 2: /* Convolve box [0,0,0]->(Nx,Ny,Nz) with deriv along z */ convolve(out->dat,in->dat,in->ifperiodic,sign=0, d1,HFL_D1,symm=-1, N=dim.z,size=1,stride_obj=1, convs=dim.x*dim.y,stride_conv=dim.z); break; case 1: /* Convolve box [0,0,0]->(Nx,Ny,Nz) with deriv along y */ convolve(out->dat,in->dat,in->ifperiodic,sign=0, d1,HFL_D1,symm=-1, N=dim.y,size=dim.z,stride_obj=dim.z, convs=dim.x,stride_conv=dim.y*dim.z); break; case 0: /* Convolve box [0,0,0]->(Nx,Ny,Nz) with deriv along x */ convolve(out->dat,in->dat,in->ifperiodic,sign=0, d1,HFL_D1,symm=-1, N=dim.x,size=dim.y*dim.z,stride_obj=dim.y*dim.z, convs=1,stride_conv=dim.x*dim.y*dim.z); break; default: printf("Illegal direction d=%d in Dsame!\n"); exit(1); } /* Continue on to all subgrids */ if (in->child) Dsame(d,out->child,in->child); if (in->sibling) Dsame(d,out->sibling,in->sibling); /* This should have the same effect as calling Dacc after Dsame, but only want this version for Dabove and not Dbelow, so simpler coding to just call Dacc explicitly. Also, the explict call to Dacc doesn't seem to take much time (a few percent at most!). */ /* if (out->parent) { */ /* int Npx=out->parent->dim.x,Npy=out->parent->dim.y,Npz=out->parent->dim.z; */ /* for (x=0; x */ /* parent-> */ /* dat[((out->org.x+x)*Npy+(out->org.y+y))*Npz+(out->org.z+z) ] */ /* += out->dat[(2*x*dim.y+2*y)*dim.z+2*z]; */ /* } */ } /* Just like Dsame, but subtracts "decrements" result from out */ void decDsame(int d,struct Grid *out, struct Grid *in) { int x,y,z; /* Convenient aliases for important quantities */ struct Ivec dim=in->dim; /* For getting appropriate filter scale */ double d1[1+HFL_D1],D; int sign,symm,N,size,stride_obj,convs,stride_conv; /* Make version of filter scaled to appropo direction */ switch (d) { case 0: D=1./in->scale.x; break; case 1: D=1./in->scale.y; break; case 2: D=1./in->scale.z; break; default: printf("Illegal direction d=%d in Dsame!\n",d); exit(1); } for (x=0; x<=HFL_D1; x++) d1[x]=D1[x]*D; switch (d) { case 2: /* Convolve box [0,0,0]->(Nx,Ny,Nz) with deriv along z */ convolve(out->dat,in->dat,in->ifperiodic,sign=-1, d1,HFL_D1,symm=-1, N=dim.z,size=1,stride_obj=1, convs=dim.x*dim.y,stride_conv=dim.z); break; case 1: /* Convolve box [0,0,0]->(Nx,Ny,Nz) with deriv along y */ convolve(out->dat,in->dat,in->ifperiodic,sign=-1, d1,HFL_D1,symm=-1, N=dim.y,size=dim.z,stride_obj=dim.z, convs=dim.x,stride_conv=dim.y*dim.z); break; case 0: /* Convolve box [0,0,0]->(Nx,Ny,Nz) with deriv along x */ convolve(out->dat,in->dat,in->ifperiodic,sign=-1, d1,HFL_D1,symm=-1, N=dim.x,size=dim.y*dim.z,stride_obj=dim.y*dim.z, convs=1,stride_conv=dim.x*dim.y*dim.z); break; default: printf("Illegal direction d=%d in Dsame!\n"); exit(1); } /* Continue on to all subgrids */ if (in->child) decDsame(d,out->child,in->child); if (in->sibling) decDsame(d,out->sibling,in->sibling); /* This should have the same effect as calling Dacc after Dsame, but only want this version for Dabove and not Dbelow, so simpler coding to just call Dacc explicitly. Also, the explict call to Dacc doesn't seem to take much time (a few percent at most!). */ /* if (out->parent) { */ /* int Npx=out->parent->dim.x,Npy=out->parent->dim.y,Npz=out->parent->dim.z; */ /* for (x=0; x */ /* parent-> */ /* dat[((out->org.x+x)*Npy+(out->org.y+y))*Npz+(out->org.z+z) ] */ /* += out->dat[(2*x*dim.y+2*y)*dim.z+2*z]; */ /* } */ } /* Basic coarser-convolution kernel Coarser-convolves (creates output at twice the input density) multiple data sets (points, lines, or planes). Output stored contiguously as a three dimensional object; navigation through input accomplished through various strides. output: pointer to start of output array ifacc: tells whether to accumulate results onto output (true) or to overwrite it (false) input: pointer to element in input array which corresponds to origin of output array c: convolution coefficients for 0, 1, 2, ... hfl: half-filter length, extend of filter on either side beyond zero symm: reflection symmetry of filter, +/-1 N: number of objects to be produced in output size: size of objects (points, lines, planes) convolved together stride_obj: stride between objects in input convs: number of convolutions to perform stride_conv: stride between starting convolution locations in input Notes: For convolutions along z we have the exploitable special case of stride_obj=1, size=1. In general, we have for convolutions along direction i N=Nout_i size=prod_{j>i} Nout_j stride_obj=prod_{j>i} Nin_j convs=prod_{j=i} Nin_j */ void expand(double *output,int ifacc,double *input,double *c,int hfl,int symm, int N,int size,int stride_obj,int convs,int stride_conv) { int ic,n2,n,el; switch (hfl) { case 5: if (ifacc) for (ic=0; ici} Nin_j stride_obj=prod_{j>i} Nout_j convs=prod_{j=i} Nout_j */ void contract(double *output,double *input,double *c,int hfl,int symm, int N,int size,int stride_obj,int convs,int stride_conv) { int ic,n2,el; register double tmp; switch (hfl) { case 5: for (ic=0; icsibling) Idag_flow(self->sibling, workspace1, workspace2, lwork); /*Same process on children*/ if (self->child) Idag_flow(self->child, workspace1, workspace2, lwork); /* Here is the accumulating upward "flow" of data */ if (self->level>0) for (i=0; idim.x; i+=2) for (j=0; jdim.y; j+=2) for (k=0; kdim.z; k+=2) *( self->parent->dat + ( (self->org.x+i/2) * self->parent->dim.y + (self->org.y+j/2) ) * self->parent->dim.z + (self->org.z+k/2) ) += *( self->dat+(i*self->dim.y+j)*self->dim.z+k ); /* Interlevel convolution with full set of Idag coefficients. */ if ( (self->parent) && (self->child) ) { if ( lwork < (self->dim.x+3*HFL_T)*(self->dim.y+3*HFL_T) *(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Idag_flow for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } aPconv3d( self->parent->dat+ ( (self->org.x) * self->parent->dim.y + (self->org.y)) * self->parent->dim.z + (self->org.z), self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, workspace1, workspace2 ); } } #define IND(x,y,z,Nx,Ny,Nz) ( ((x)*(Ny)+(y))*(Nz)+(z) ) void Dabove(int d, struct Grid *self,double *work1,double *work2,int lwork) { /* Indices */ int x,y,z; int Nx=self->dim.x,Ny=self->dim.y,Nz=self->dim.z; /* Sanity check on derivative direction */ if (d<-1 || d>2) { printf("Illegal direction d=%d in Dabove!!!\n",d); exit(1); } /* Check on workspace */ if ( self->level > 0 ) { if ( lwork < (self->dim.x+3*HFL_T)*(self->dim.y+3*HFL_T) *(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Dabove for I..."); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } if ( lwork < (self->dim.x/2+5)*self->dim.y*self->dim.z ) { fprintf(stderr,"Insufficient work space in Dabove..."); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } } /* This has same effect as taking I explicitly just before calling Dabove */ if ( self->level > 0 ) aconvP3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y,self->dim.z, HFL_T, work1, work2 ); /* Work from bottom up */ if (self->child) Dabove(d,self->child,work1,work2,lwork); /* Do siblings AFTER children to make inheritance streams for cache */ if (self->sibling) Dabove(d,self->sibling,work1,work2,lwork); /* Convolve parent onto self */ if (self->parent==NULL) /* Zero contribution if no parent */ for (x=0; xdat[x]=0.; else { struct Ivec dim=self->dim; struct Ivec org=self->org; struct Ivec pdim=self->parent->dim; struct Dvec pscale=self->parent->scale; double *pdat=self->parent->dat; int x,ifacc,N,size,stride_obj,convs,stride_conv; double *F; int hfl,sign; /* Appropriately scaled filter for taking derivatives */ double d2x[1+HFL_D2]; double d2y[1+HFL_D2]; double d2z[1+HFL_D2]; int i; for (i=0; i<=HFL_D2; i++) { d2x[i]=D2[i]/pscale.x; d2y[i]=D2[i]/pscale.y; d2z[i]=D2[i]/pscale.z; } /* Expand the box [org.x-2,org.y-2,org.z]->(org.x+Nx/2+3,org.y+Ny/2+3,org.z+Nz/2) along z to make array of size [Nx/2+5,Ny/2+5,Nz]. The extra space is needed for borders for upcoming x and y convolutions. */ /* For expansions along z, we loop over each plane as the stride between planes is not contiguous (and so can't be done in a single call to expand) due to expansion region not filling the full parent. AND, INTERLEAVE this with expanding along y to make array of size [Nx/2+5,Ny,Nz] with final optimization of noting that only one x plane of work1 is ever used and so we always use the first one. */ for (x=0; x(Nx/2+5,Ny,Nz) along z, making the final array of size [Nx,Ny,Nz] in the output data block */ if (d==0) {F=d2x; hfl=HFL_D2; sign=-1;} else {F=T2; hfl=HFL_T2; sign=1;} expand(self->dat,ifacc=0, &work2[IND(2,0,0,dim.x/2+5,dim.y,dim.z)], F,hfl,sign, N=dim.x,size=dim.y*dim.z,stride_obj=dim.y*dim.z, convs=1,stride_conv=(dim.x/2+5)*dim.y*dim.z); } } /* Accumlative version of Dabove. Identical arguments, except for -- out: grid onto which results are accumulated */ void accDabove(int d, struct Grid *out,struct Grid *self,double *work1,double *work2,int lwork) { /* Indices */ int x,y,z; int Nx=self->dim.x,Ny=self->dim.y,Nz=self->dim.z; /* Sanity check on derivative direction */ if (d<0 || d>2) { printf("Illegal direction d=%d in Dabove!!!\n",d); exit(1); } /* Check on workspace */ if (self->level > 0) { if ( lwork < (self->dim.x+3*HFL_T)*(self->dim.y+3*HFL_T) *(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in accDabove for I..."); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } if ( lwork < (self->dim.x/2+5)*self->dim.y*self->dim.z ) { fprintf(stderr,"Insufficient work space in accDabove..."); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } } /* This has same effect as taking I explicitly just before calling Dabove */ if ( self->level > 0 ) aconvP3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y,self->dim.z, HFL_T, work1, work2 ); /* Work from bottom up */ if (self->child) accDabove(d,out->child,self->child,work1,work2,lwork); /* Do siblings AFTER children to make inheritance streams for cache */ if (self->sibling) accDabove(d,out->sibling,self->sibling,work1,work2,lwork); /* Convolve parent onto self */ /* Zero contribution if no parent */ if (self->parent) { struct Ivec dim=self->dim; struct Ivec org=self->org; struct Ivec pdim=self->parent->dim; struct Dvec pscale=self->parent->scale; double *pdat=self->parent->dat; int x,ifacc,N,size,stride_obj,convs,stride_conv; double *F; int hfl,sign; /* Appropriately scaled filter for taking derivatives */ double d2x[1+HFL_D2]; double d2y[1+HFL_D2]; double d2z[1+HFL_D2]; int i; for (i=0; i<=HFL_D2; i++) { d2x[i]=D2[i]/pscale.x; d2y[i]=D2[i]/pscale.y; d2z[i]=D2[i]/pscale.z; } /* Expand the box [org.x-2,org.y-2,org.z]->(org.x+Nx/2+3,org.y+Ny/2+3,org.z+Nz/2) along z to make array of size [Nx/2+5,Ny/2+5,Nz]. The extra space is needed for borders for upcoming x and y convolutions. */ /* For expansions along z, we loop over each plane as the stride between planes is not contiguous (and so can't be done in a single call to expand) due to expansion region not filling the full parent. AND INTERLEAVE this with expanding along y to make array of size [Nx/2+5,Ny,Nz] with final optimization of noting that only one x plane of work1 is ever used and so we always use the first one. */ for (x=0; x(Nx/2+5,Ny,Nz) along z, making the final array of size [Nx,Ny,Nz] in the output data block */ if (d==0) {F=d2x; hfl=HFL_D2; sign=-1;} else {F=T2; hfl=HFL_T2; sign=1;} expand(out->dat,ifacc=1, &work2[IND(2,0,0,dim.x/2+5,dim.y,dim.z)], F,hfl,sign, N=dim.x,size=dim.y*dim.z,stride_obj=dim.y*dim.z, convs=1,stride_conv=(dim.x/2+5)*dim.y*dim.z); } } /* Dagger of Dabove in the sense of identification of ghost images of real space points, thus including prezeroing of ghost points before convolution */ void Dbelow(int d, struct Grid *out, struct Grid *in,double *work1,double *work2,int lwork) { /* Indices */ int x,y,z; int Nx=out->dim.x,Ny=out->dim.y,Nz=out->dim.z; /* Sanity check on derivative direction */ if (d<-1 || d>2) { printf("Illegal direction d=%d in Dabove!!!\n",d); exit(1); } /* Working from top down... */ /* Accumulate-convolve onto parent (if any) */ if (out->parent) { struct Ivec dim=out->dim; struct Ivec org=out->org; struct Ivec pdim=out->parent->dim; struct Dvec pscale=out->parent->scale; double *pdat=out->parent->dat; int x,N,size,stride_obj,convs,stride_conv; double *F; int hfl,sign; double d2x[1+HFL_D2]; double d2y[1+HFL_D2]; double d2z[1+HFL_D2]; int i; for (i=0; i<=HFL_D2; i++) { d2x[i]=D2[i]/pscale.x; d2y[i]=D2[i]/pscale.y; d2z[i]=D2[i]/pscale.z; } /* Check on workspace */ if ( lwork < (in->dim.x/2+5)*in->dim.y*in->dim.z) { fprintf(stderr,"Insufficient work space in Dbelow for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", in->dim.x,in->dim.y,in->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* Copy input and zero ghosts (which don't contrib to Dbelow) */ for (x=0; xdat[IND(x,y,z,dim.x,dim.y,dim.z)]=0.; else out->dat[IND(x,y,z,dim.x,dim.y,dim.z)]= in->dat[IND(x,y,z,dim.x,dim.y,dim.z)]; /* Contract the box [0,0,0]->(Nx,Ny,Nz) from out along x, making the final array of size [Nx/2+5,Ny,Nz] into work1. Note that the origin of the input box correpsonds to the point (2,0,0) in the output box because the effects of the lower level 'puff out' along x. NOTE: must zero work1 first because contract() is accumulative */ for (x=0; x<(dim.x/2+5)*dim.y*dim.z; x++) work1[x]=0.; if (d==0) {F=d2x; hfl=HFL_D2; sign=-1;} else {F=T2; hfl=HFL_T2; sign=1;} contract(&work1[IND(2,0,0,dim.x/2+5,dim.y,dim.z)], out->dat, F,hfl,sign, N=dim.x,size=dim.y*dim.z,stride_obj=dim.y*dim.z, convs=1,stride_conv=(dim.x/2+5)*dim.y*dim.z); /* Contract the box [0,0,0]->(Nx/2+5,Ny,Nz) along y to make array of size [Nx/2+5,Ny/2+5,Nz] in work2. Note that the origin of the input box corresponds to (0,2,0) in the output. NOTE: must zero work2 first because contract() is accumulative. AND INTERLEAVE this operation with contracting the box [0,0,0]->(Nx/2+5,Ny/2+5,Nz) along z and accumulate onto parent with origin of work2 corresponding to (org.x-2,org.y-2,org.z). The -2's are because of the buffer we've built into work2. The final optimization was to note that only one plane of work2 is ever used at once, so we just use the 0th one to reduce memory volume */ for (x=0; xdat[x]=0.; /* Do children */ if (out->child) Dbelow(d,out->child,in->child,work1,work2,lwork); /* Do siblings AFTER children to make inheritance streams for cache */ if (out->sibling) Dbelow(d,out->sibling,in->sibling,work1,work2,lwork); } /* Evaluate the "Dd" operator, the first derivative in dth direction in REAL SPACE given value of function in coefficient space --- d: direction of derivative out: output (in real space) in: input (in coeff space) work1: workspace work2: workspace lwork: size of workspaces */ void D(int d, struct Grid *out, struct Grid *in,struct Grid *work,double *work1,double *work2,int lwork) { /* Save copy of input. Needed so that D isn't destructive. If destructive were permissible, eliminating this would lead to a noticeable time savings. */ copygridnew(work,in); /* Contributions from levels below */ Dsame(d,out,in); Dacc(out); /* Add on contributions from levels above */ accDabove(d,out,work,work1,work2,lwork); return; } /* Hermitian conjugate of D, with same inputs. Routine derived by "daggering" (reversing data flow and daggering) the routine for D: */ void Ddag(int d, struct Grid *out, struct Grid *in,struct Grid *work,double *work1,double *work2,int lwork) { /* Find contributions from below current level */ Dbelow(d,out,in,work1,work2,lwork); Idag_flow(out,work1,work2,lwork); /* Because the first derivative filter is asymmetric, its dagger is just minus itself. Thus, decrement same convolution result from out. */ decDsame(d,out,in); /* Impose those zeros on ghosts */ zeroghostrec(out); }