/* DFT++ is a density functional package developed by the research group of Professor Tomas Arias Copyright 1996-2003 Sohrab Ismail-Beigi This file is part of DFT++. DFT++ is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DFT++ is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DFT++; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please see the file CREDITS for a list of authors. For academic users, we request that publications using results obtained with this software reference "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000). and, if using the wavelet basis, further reference "Multiresolution analysis of electronic structure: semicardinal and wavelet bases," T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999). and "Robust ab initio calculation of condensed matter: transparent convergence through semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003). For your convenience, preprints of the above articles may be obtained from http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively. */ #include #include #include #include "WL_header.h" /*****************************************************************************/ /* Interpolet routines */ /*****************************************************************************/ /* Multilevel J (IN PLACE, RECURSIVE FUNCTION) self: pointer to grid to be transformed work: pointer to work space of size >= prod(dim+3*HFL) lwork: size of space pointed to by work (for checking purposes) */ void J(struct Grid *self,double *work,double *work_extra,int lwork) { /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (self==NULL) return; /* J works from the bottom up, so go to the children first; return immediately if there is nothing below. */ J(self->child, work, work_extra,lwork); /* Best caching in vertical segements, so do the work (HERE!) before going on to siblings */ /* For J "self" takes the role of the lower level. Thus J does no actual on the top level! */ if (self->level > 0){ /* Make sure there is enough work space */ if ( lwork < (self->dim.x+3*HFL_T) *(self->dim.y+3*HFL_T)*(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in J for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* Do it! */ aconvP3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, -1, tr, tf,ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, work, work_extra ); } /* Print statement(s) HERE AS WORK IS DONE for debugging */ /* { */ /* int level; */ /* printf("J: "); */ /* for (level=0; level< self->level; level++) */ /* printf(" "); */ /* printf("[%d]\n",self->level); */ /* } */ /* Now, go on to siblings; again return immediately if there is nothing below. */ J(self->sibling, work, work_extra, lwork); return; } /* Multilevel I (IN PLACE, RECURSIVE FUNCTION) self: pointer to grid to be transformed work: pointer to work space of size >= prod(dim+3*HFL) lwork: size of space pointed to by work (for checking purposes) */ void I(struct Grid *self,double *work,double *work_extra,int lwork) { /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (self==NULL) return; /* I works from the top down, so do work now, before the children */ /* For I "self" takes the role of the lower level. Thus I does no actual work on the top level! */ if (self->level > 0) { /* Make sure there is enough work space */ if ( lwork < (self->dim.x+3*HFL_T) *(self->dim.y+3*HFL_T)*(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in I "); fprintf(stderr,"for data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* Do it! */ aconvP3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 1, tr, tf,ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, work,work_extra ); } /* Print statement(s) HERE AS WORK IS DONE for debugging */ /* { */ /* int level; */ /* printf("I: "); */ /* for (level=0; level< self->level; level++) */ /* printf(" "); */ /* printf("[%d]\n",self->level); */ /* } */ /* Best caching in vertical segements, so do children FIRST, and THEN sibblings */ I(self->child, work, work_extra, lwork); I(self->sibling, work, work_extra, lwork); return; } /* Copy the top level of a periodic grid to expanded top level, taking into account the bloch phase of the wavefunctions we assume that the grid allocation is done outside kvec is in lattice cooordinates, therefore it has to be rescaled to physical coordinates before we use it */ void CopyBlochPad(struct Grid *outre, struct Grid *outim, struct Grid *inre, struct Grid *inim, struct Dvec kvec, struct Ivec pad) { /* here we can have some checks for structure consistency outre==outim, outdim=indim+2pad */ int i, j, k; /* output grid indices */ int ii, jj, kk; /* input grid indices */ struct Ivec outdim=outre->dim; struct Ivec indim=inre->dim; int datapos; double cosx, cosy, cosz, sinx, siny, sinz; double freal=1.0,fimag=0.0; /* phase factors when we copy the data */ double tmpre, tmpim; /* tmp variables to do COMPLEX*COMPLEX */ /* we used to have kvec in physical coordinates, now it's back to lattice */ /* cosx = cos(inre->dim.x*inre->scale.x * kvec.x); */ /* sinx = sin(inre->dim.x*inre->scale.x * kvec.x); */ /* cosy = cos(inre->dim.y*inre->scale.y * kvec.y); */ /* siny = sin(inre->dim.y*inre->scale.y * kvec.y); */ /* cosz = cos(inre->dim.z*inre->scale.z * kvec.z); */ /* sinz = sin(inre->dim.z*inre->scale.z * kvec.z); */ cosx = cos(2 * M_PI * kvec.x); sinx = sin(2 * M_PI * kvec.x); cosy = cos(2 * M_PI * kvec.y); siny = sin(2 * M_PI * kvec.y); cosz = cos( 2 * M_PI * kvec.z); sinz = sin( 2 * M_PI * kvec.z); /* now copy the data from "in" to "out", by makng sure the bloch's phase is accounted when in the padded region */ for (i=0; i=0 && ii=0 && jj=0 && kk=pad.x && i=pad.y && j=pad.z && kdat + (i*outdim.y +j)*outdim.z + k ) = *( inre->dat + (ii*inre->dim.y + jj)*indim.z + kk ); *( outim->dat + (i*outdim.y+j)*outdim.z + k ) = *( inim->dat + (ii*inim->dim.y + jj)*indim.z + kk ); } else { /* copy with a phase factor, this is the next cell */ freal=1.0; fimag=0.0; /* now determine the phase factors */ if(i=outdim.x - pad.x) { /* we are in the next cell: f*=e^{i*k*x} */ tmpre = cosx*freal - sinx*fimag; tmpim = cosx*fimag + sinx*freal; freal=tmpre; fimag=tmpim; /* take data from the original cell */ ii -= indim.x; } if(j=outdim.y - pad.y) { /* we are in the next cell: f*=e^{i*k*y} */ tmpre = cosy*freal - siny*fimag; tmpim = cosy*fimag + siny*freal; freal=tmpre; fimag=tmpim; /* take data from the original cell */ jj -= indim.y; } if(k=outdim.z - pad.z) { /* we are in the next cell: f*=e^{i*k*z} */ tmpre = cosz*freal - sinz*fimag; tmpim = cosz*fimag + sinz*freal; freal=tmpre; fimag=tmpim; /* take data from the original cell */ kk -= indim.z; } datapos = (ii*indim.y + jj)*indim.z + kk; /* copy the data with the appropriate phase factors */ *( outre->dat+ (i*outdim.y + j)*outdim.z + k )= freal*inre->dat[datapos] - fimag*inim->dat[datapos]; *( outim->dat+ (i*outdim.y + j)*outdim.z + k )= freal*inim->dat[datapos] + fimag*inre->dat[datapos]; } } } /* Copy Grid to one with padded regions - one level */ void Copy2Padded(struct Grid *out, struct Grid *in, struct Ivec pad) { /* here we can have some checks for structure consistency */ int i, j, k; /* output grid indices */ int ii, jj, kk; /* input grid indices */ struct Ivec outdim=out->dim; struct Ivec indim=in->dim; int inpos, outpos;/* location of the data */ /* now copy the data from "in" to "out", by makng sure the periodicity is accounted for when in the padded region */ for (i=0; iifperiodic)/* copy everything */ *( out->dat + outpos) = *( in->dat + inpos); else /* copy only in the inner region and zero the pad */ if(i>=pad.x && i=pad.y && j=pad.z && kdat + outpos) = *( in->dat + inpos); else *( out->dat + outpos) = 0.; } } } } /* periodic */ void CopyfromPadded(struct Grid *out, struct Grid *in, struct Ivec pad) { /* here we can have some checks for structure consistency */ int i, j, k; /* output grid indices */ int ii, jj, kk; /* input grid indices */ struct Ivec outdim=out->dim; struct Ivec indim=in->dim; int inpos, outpos;/* location of the data */ /* now copy the data from "in" to "out" */ for (i=0; idat + outpos) = *( in->dat + inpos); } } } } /* Copy padded grid on nonpadded - periodic If the output grid is periodic add the data from the pads to their coresponding places NOT VRFD */ void CopyfromPadded_p(struct Grid *out, struct Grid *in, struct Ivec pad) { /* here we can have some checks for structure consistency */ int i, j, k; /* input grid indices */ int ii, jj, kk; /* output grid indices */ struct Ivec outdim=out->dim; struct Ivec indim=in->dim; int inpos, outpos;/* location of the data */ /* first zero out the output */ for(i=0; inxyz; i++) in->dat[i]=0; /* now copy the data from "in" to "out", by makng sure the periodicity is accounted for when in the padded region */ for (i=0; iifperiodic)/* copy everything */ *( out->dat + outpos) += *( in->dat + inpos); else /* copy only in the inner region */ if(i>=pad.x && i=pad.y && j=pad.z && kdat + outpos) += *( in->dat + inpos); } } } } /*copies "in" and all of its siblings onto "out", which is twice denser grid than the parent of "in", i.e. it's scale is the same as in but "in"'s origin is displaced from the origin of "out " */ void copyfromsiblings(struct Grid *out, struct Grid *in) { int i, j, k, ii, jj, kk; if(in==NULL) return; for(i=0; idim.x; i++){ ii = i + 2*in->org.x; for(j=0; jdim.y; j++){ jj = j + 2*in->org.y; for(k=0; kdim.z; k++){ kk = k + 2*in->org.z; *(out->dat+ (ii*out->dim.y+jj)*out->dim.z +kk)= *(in->dat+ (i*in->dim.y+j)*in->dim.z +k); } } } /* now go to the next sibling */ copyfromsiblings(out, in->sibling); } /* Two level SPARSE->DENSE transformation It takes care for the Bloch's nature on the top level outre, outim: real and imaginary part of the dense grid inre, inim: real and imaginary part of the sparse grid kvec: reciprocal lattice vector, for the bloch's phase Works from top to bottom */ void I_S2D(struct Grid *outre, struct Grid *outim, struct Grid *inre, struct Grid *inim , struct Dvec kvec, double *workspace1, double *workspace2, int lwork) { /* the size of the padding */ struct Ivec pad={HFL_T, HFL_T, HFL_T}; struct Ivec outorg;/* temp storage */ /* create the expanded top levels (for now the same as the output grid) */ struct Grid topre; struct Grid topim; int i; if(outre==NULL) return; /* then change their dimensions */ if (outre->level==0){ /* first make the top level as the input top level */ topre=*inre; topim=*inim; /* now add the padding */ topre.dim.x+=2*pad.x; topre.dim.y+=2*pad.y; topre.dim.z+=2*pad.z; topim.dim.x+=2*pad.x; topim.dim.y+=2*pad.y; topim.dim.z+=2*pad.z; topre.nxyz=topre.dim.x*topre.dim.y*topre.dim.z; topim.nxyz=topim.dim.x*topim.dim.y*topim.dim.z; /* the padded grid is non-periodic */ topre.ifperiodic=topim.ifperiodic=0; /* allocate the data */ topre.dat = (double *)malloc(topre.nxyz*sizeof(double)); topim.dat = (double *)malloc(topim.nxyz*sizeof(double)); if(topre.dat==NULL || topim.dat==NULL){ printf("Unable to allcoate data in I_S2D!\n"); exit(1); } /* zero out the data */ for(i=0; iorg; /* save it for later undo */ outre->org=pad; outre->parent=&topre; outim->org=pad; outim->parent=&topim; } /* now copy the data from in->child & in->child->sibling to out zero out the current level */ for(i=0; inxyz; i++) outre->dat[i]=outim->dat[i]=0.; if(inre->child){ /* copy the child and all its siblings on the current level */ copyfromsiblings(outre, inre->child); copyfromsiblings(outim, inim->child); } /* then call the 2 level convolution between l==0: top & out l>0: out->parent & out we can actually set the parent of out to be top...then no distinction */ /* make sure there is enough workspace */ if ( lwork < (outre->dim.x+3*HFL_T) *(outre->dim.y+3*HFL_T)*(outre->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in I_S2D "); fprintf(stderr,"for data set of size (%dx%dx%d)\n", outre->dim.x, outre->dim.y, outre->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* now call the 2 level convolution */ aconvP3d( outre->parent->dat+ (outre->org.x * outre->parent->dim.y + outre->org.y) * outre->parent->dim.z + outre->org.z, outre->dat, 1, tr, tf,ts, outre->parent->dim.x, outre->parent->dim.y, outre->parent->dim.z, outre->dim.x, outre->dim.y, outre->dim.z, HFL_T, workspace1, workspace2); aconvP3d( outim->parent->dat+ (outim->org.x * outim->parent->dim.y + outim->org.y) * outim->parent->dim.z + outim->org.z, outim->dat, 1, tr, tf,ts, outim->parent->dim.x, outim->parent->dim.y, outim->parent->dim.z, outim->dim.x, outim->dim.y, outim->dim.z, HFL_T, workspace1, workspace2); if(outre->level==0){ /* restore the original state and cleanup */ outre->org=outorg; outim->org=outorg; outre->parent=outim->parent=NULL; free(topre.dat); free(topim.dat); } /* then call the children */ I_S2D(outre->child, outim->child, inre->child, inim->child, kvec, workspace1, workspace2, lwork); I_S2D(outre->sibling, outim->sibling, inre->sibling, inim->sibling, kvec, workspace1, workspace2, lwork); } /* Two level SPARSE->DENSE transformation It takes care of the periodicity on the top level outd: output - RealSpac, dense grid ins: input - Coeff Space, sparse grid Works from top to bottom This is for real grids */ void I_S2Dr(struct Grid *outd, struct Grid *ins, double *workspace1, double *workspace2, int lwork) { /* the size of the padding */ struct Ivec pad={HFL_T, HFL_T, HFL_T}; struct Ivec outorg;/* temp storage */ /* create the padded top level */ struct Grid top; int i; if(outd==NULL) return; if (outd->level==0){ /* first make the top level the same as the input top level */ top=*ins; /* now add the padding */ top.dim.x+=2*pad.x; top.dim.y+=2*pad.y; top.dim.z+=2*pad.z; top.nxyz=top.dim.x*top.dim.y*top.dim.z; /* the padded grid is non-periodic */ top.ifperiodic=0; /* allocate the data */ top.dat = (double *)malloc(top.nxyz*sizeof(double)); if(top.dat==NULL){ printf("Unable to allcoate data in I_S2D!\n"); exit(1); } /* zero out the data */ for(i=0; iorg; /* save it for later undo */ /* update parent&origin since the "top" is padded */ outd->org=pad; outd->parent=⊤ } /* zero out the current level */ for(i=0; inxyz; i++) outd->dat[i]=0.; /* now copy the data from in->child & in->child->sibling to out */ if(ins->child){ /* copy the child and all its siblings on the current level */ copyfromsiblings(outd, ins->child); } /* make sure there is enough workspace for the convolution */ if ( lwork < (outd->dim.x+3*HFL_T) *(outd->dim.y+3*HFL_T)*(outd->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in I_S2D "); fprintf(stderr,"for data set of size (%dx%dx%d)\n", outd->dim.x, outd->dim.y, outd->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* now call the 2 level convolution */ aconvP3d( outd->parent->dat+ (outd->org.x * outd->parent->dim.y + outd->org.y) * outd->parent->dim.z + outd->org.z, outd->dat, 1, tr, tf,ts, outd->parent->dim.x, outd->parent->dim.y, outd->parent->dim.z, outd->dim.x, outd->dim.y, outd->dim.z, HFL_T, workspace1, workspace2); /* if you are the top level undo all the tricks */ if(outd->level==0){ /* restore the original state and cleanup */ outd->org=outorg; outd->parent=NULL; free(top.dat); } /* then call the children */ I_S2Dr(outd->child, ins->child, workspace1, workspace2, lwork); I_S2Dr(outd->sibling, ins->sibling, workspace1, workspace2, lwork); } /* Update the parent grid to comply ti the real space convention i.e. the values of grid points on the top pof each other match */ void updateparentre(struct Grid *grid) { int i, j, k; int ii, jj, kk; if(grid->parent==NULL) return; for (i=0; idim.x; i+=2){ ii = grid->org.x + i/2; for (j=0; jdim.y; j+=2){ jj = grid->org.y + j/2; for (k=0; kdim.z; k+=2){ kk = grid->org.z + k/2; *( grid->parent->dat + (ii*grid->parent->dim.y + jj)*grid->parent->dim.z + kk ) = *( grid->dat+(i*grid->dim.y+j)*grid->dim.z+k ); } } } } /* Multilevel Idag (IN PLACE, RECURSIVE FUNCTION) self: pointer to grid to be transformed work: pointer to work space of size >= prod(dim+3*HFL) lwork: size of space pointed to by work (for checking purposes) */ void Idag(struct Grid *self,double *work,double *work_extra,int lwork) { /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (self==NULL) return; /* Idag works from the bottom up, so go to the children first; return immediately if there is nothing below. */ Idag(self->child, work, work_extra, lwork); /* Best caching in vertical segements, so do the work (HERE!) before going on to siblings */ /* For Idag "self" takes the role of the lower level. Thus Idag does no actual on the top level! */ if (self->level > 0) { /* Make sure there is enough work space for covolution */ if ( lwork < (self->dim.x+3*HFL_T)*(self->dim.y+3*HFL_T) *(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Idag for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* TRICKY: Consistency of values is a little tricky here because your children may change your values and your parent needs to know this! (This is not an issue for the standard transforms because they conform to the real-coef convention naturally.) Now, to keep the values consistent, we need to update our parent after the children have changed us. It is best to do that BEFORE the siblings, so that the work they do is (sort of) consistent with these changes. I think there is a (small) chance that this is the correct thing to do if the siblings interfere with each other. Certainly, doing this here is needed to keep consistency of the ghost values. God help us if we start threading this! *** NOTE: this and the convolution happen in opposite orders in Idag and Jdag! This is so that Idag can be the inverse of Jdag. For this to happen, Idag needs to know what Jdag "saw". */ /* Update parent before doing convolution. */ /* ipd: code it in separate function */ updateparentre(self); /* for (i=0; idim.x; i+=2) for (j=0; jdim.y; j+=2) for (k=0; kdim.z; k+=2) *( self->parent->dat + ( (self->org.x+i/2) * self->parent->dim.y + (self->org.y+j/2) ) * self->parent->dim.z + (self->org.z+k/2) ) = *( self->dat+(i*self->dim.y+j)*self->dim.z+k ); */ /* Do it! */ aPconv3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, work, work_extra ); } Idag(self->sibling, work, work_extra, lwork); return; } /* Idag on all the grids on the current level: Recursive (over cousins) not recursive anymore does the work on one grid self: the curent grid work, work_extra, lwork: workspace parameters */ void Idag_grid(struct Grid *self, double *work,double *work_extra,int lwork) { struct Grid denum; /* holds quotients for the overlapping regions */ int i; /* return immediately if there is nothing to do */ if (self==NULL) return; /* For Idag "self" takes the role of the lower level. Thus Idag does no actual on the top level! */ if (self->level > 0) { /* Make sure there is enough work space for covolution */ if ( lwork < (self->dim.x+3*HFL_T) * (self->dim.y+3*HFL_T) * (self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Idag for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* on input we must have consitency: convention is that all the overlapping regions have the same data Before calling the convolution we must ensure grid consistency: We have been changed by our children, therefore we must update the values above us, with these changes (the two levels have to obey the real space convention of ghost points) Note that the convolution will update our ghost points, but it requires input, consistent with the real space convention do the update otside - on the whole level because if its here it'll destroy any info thatmay heve been transferred by the children before us updateparentre(self); if we have regions that overlap with our siblings/cousins we must divide the data in these regions by the number of grids it is shared by, so that the cummulative convolution does the proper job setup the grid to hold the quotients */ denum = *self; /* allocate its data */ denum.dat=(double *)malloc(sizeof(double)*denum.nxyz); /* @@@ Pull this on the upper level - easier to implement */ getquotient(&denum, self); /* if(self->level==1){ fp=fopen("s1","w"); writetofile(fp,self); fclose(fp); fp=fopen("p1","w"); writetofile(fp,self->parent); fclose(fp); } scanf("%d", &i); */ /* THIS IS SCARY: think of an alternative */ for(i=0; inxyz; i++) self->dat[i]/=denum.dat[i]; /* Perform the two level convolution (cummulative) */ aPconv3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, 1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, work, work_extra ); /* if(self->level==1){ fp=fopen("s2","w"); writetofile(fp,self); fclose(fp); }*/ /* if(self->level==1){ fp=fopen("p2","w"); writetofile(fp,self->parent); fclose(fp); } */ /* scanf("%d", &i); */ /* restore the quotients: only ourselves */ for(i=0; inxyz; i++) self->dat[i]*= denum.dat[i]; /* if(self->level==1){ fp=fopen("p3","w"); writetofile(fp,self->parent); fclose(fp); } */ /* scanf("%d",&i); */ free(denum.dat); /* now we must go to the cousin (or sibling) Idag_level(self->sibling, work, work_extra, lwork); Idag_level(self->cousin, work, work_extra, lwork); */ } } struct Grid * mkgridfromgrid_level(struct Grid*); void update_parent_level(struct Grid* self) { struct Grid *next=self; while(next){ updateparentre(next); next=next->cousin; } } void copygrid_level(struct Grid *out, struct Grid *in); /* add all the overlapping regions on all the grids of a given level */ void addon_overlap_regions_level(struct Grid* self, struct Grid* diff); /* ipd: This Idag should work with our new overlapping grids Multilevel Idag (IN PLACE, RECURSIVE FUNCTION) self: pointer to grid to be transformed work: pointer to work space of size >= prod(dim+3*HFL) lwork: size of space pointed to by work (for checking purposes) */ void Idag_new(struct Grid *self,double *work,double *work_extra,int lwork) { /* copy of the parrent level (use it for the overlaping regions) */ struct Grid *pdiff; struct Grid *next;/* loop index */ /* return immediately if there is nothing to do */ if (self==NULL) return; /* Idag works from the bottom up, so go to the children first; return immediately if there is nothing below. */ Idag_new(self->child, work, work_extra, lwork); /* update the ghosts on the level above, because this level has been changed from below, and the realspace convention is not maintained anymore */ update_parent_level(self); /* now before we start accumulating on the level above us we need to make a copy of that level, so that later on we can take care of the overlapping regions later. Call the copy diff. */ pdiff=mkgridfromgrid_level(self->parent); /* copy the data */ copygrid_level(pdiff, self->parent); /* now we do all the work on the current level (with quotients) This routine should ensure that the current level is updated properly and the parents get all the contribution from their children. if two grids on the current level overlap, the quotient mechanism ensures that the iformation from these regions is passed up in total once. If several overlapping children have the same parent, there is nothing more to do, but if they have different parents. we must ensure that the information everybody passes up is propagated to all the parents (see below) */ next=self; while (next){ /* printf("LEVEL:%d grid:%d ", next->level, next); */ Idag_grid(next, work, work_extra, lwork); next=next->cousin; /* printf("cousin: %d\n", next); */ } /* after Idag_level is done, the overlapping regions on the upper level still do not have all the information. This is because a sometimes information from lower level can flow to upper level, which is not its immetiate parent. The quotients mechanism ensures that the transferred up data is not overcounted. Still it has to be propagated among the overlapping regions on the upper level. We do it in this way: Before transferring data up, on the uper level we have two overlapping grids: a, b. After the data transfer the grids become a', b'. Then on the overlapping regions we perform a(final) = a + (b'-b); b(final) = b + (b'-b); Technically we saved a copy of the whole level, in diff, so now we can get the change on the whole level: */ /* diff = self->parent - diff // loop over cousins */ if(pdiff){ gridsub_level(pdiff, self->parent, pdiff); } /* now propagate the changes in the overlapping regions to everybody again loop over cousins */ addon_overlap_regions_level(self->parent, pdiff); killgrid_level(pdiff); } /* copies padded grid on nonpadded interior and takes care of the proper folding of the pads according to Bloch's Thm inre/inim: input grid (padded, so it's wider) outre/outim: output grid (trimmed) */ void CopyfromBlochPad(struct Grid *outre, struct Grid *outim, struct Grid *inre, struct Grid *inim, struct Dvec kvec, struct Ivec pad) { /* here we can have some checks for structure consistency outre==outim, outdim=indim+2pad */ int i, j, k; /* input grid indices */ int ii, jj, kk; /* output grid indices */ struct Ivec outdim=outre->dim; struct Ivec indim=inre->dim; int inpos, outpos;/* position of the data in the in/out grids */ double cosx, cosy, cosz, sinx, siny, sinz; double freal=1.0,fimag=0.0; /* phase factors when we copy the data */ double tmpre, tmpim; /* tmp variables to do COMPLEX*COMPLEX */ /* We used to have kvec in physical coordinates, not it's in lattice cosx = cos(-outdim.x*outre->scale.x * kvec.x); sinx = sin(-outdim.x*outre->scale.x * kvec.x); cosy = cos(-outdim.y*outre->scale.y * kvec.y); siny = sin(outdim.y*outre->scale.y * kvec.y); cosz = cos(-outdim.z*outre->scale.z * kvec.z); sinz = sin(-outdim.z*outre->scale.z * kvec.z); */ cosx = cos(- 2 * M_PI * kvec.x); sinx = sin(- 2 * M_PI * kvec.x); cosy = cos(- 2 * M_PI * kvec.y); siny = sin(- 2 * M_PI * kvec.y); cosz = cos(- 2 * M_PI * kvec.z); sinz = sin(- 2 * M_PI * kvec.z); /* may be we should take care of zero-ing the output at the beginning, so that we can use += to copy the data */ for(i=0; inxyz; i++) outre->dat[i]=outim->dat[i]=0.; /* now copy the data from "in" to "out", by makng sure the bloch's phase is accounted when in the padded region */ for (i=0; i=pad.x && i=pad.y && j=pad.z && kdat + outpos ) += *( inre->dat + inpos); *( outim->dat + outpos ) += *( inim->dat + inpos); } else { /* copy with a phase factor, this is the next cell */ freal=1.0; fimag=0.0; /* now determine the phase factors */ if(i=outdim.x - pad.x) { /* we are in the next cell: f*=e^{i*k*x} */ tmpre = cosx*freal - sinx*fimag; tmpim = cosx*fimag + sinx*freal; freal=tmpre; fimag=tmpim; } if(j=outdim.y - pad.y) { /* we are in the next cell: f*=e^{i*k*y} */ tmpre = cosy*freal - siny*fimag; tmpim = cosy*fimag + siny*freal; freal=tmpre; fimag=tmpim; } if(k=outdim.z - pad.z) { /* we are in the next cell: f*=e^{i*k*z} */ tmpre = cosz*freal - sinz*fimag; tmpim = cosz*fimag + sinz*freal; freal=tmpre; fimag=tmpim; } /* copy the data with the appropriate phase factors */ *( outre->dat + outpos) += freal*inre->dat[inpos] - fimag*inim->dat[inpos]; *( outim->dat+ outpos) += freal*inim->dat[inpos] + fimag*inre->dat[inpos]; } } } /* copies "in" onto "out" and all of its siblings. "in" is twice denser grid than the parent of "out", i.e. it's scale is the same as "out" but "out"'s origin is displaced from the origin of "in" */ void copytosiblings(struct Grid *out, struct Grid *in) { int i, j, k, ii, jj, kk; if(out==NULL) return; for(i=0; idim.x; i++){ ii = i + 2*out->org.x; for(j=0; jdim.y; j++){ jj = j + 2*out->org.y; for(k=0; kdim.z; k++){ kk = k + 2*out->org.z; *(out->dat+ (i*out->dim.y+j)*out->dim.z +k)= *(in->dat+ (ii*in->dim.y+jj)*in->dim.z +kk); } } } /* now go to the next sibling */ copytosiblings(out->sibling, in); } /* Idag from dense to sparse grid both dense and sparse grids are on the same level We first do the transfrom on the dense grid, as usual and then copy the data onto the sparse grid (actually on sparse grid's child and child's siblintgs) The top level is TRICKY: we must create top level of the dense grid, which has padding, so that we can perform two level transform between it and the original dense top level. After this we copy the new padded top level onto the sparse top level. (we can't have the transform between the original dense top level and the sparse top level, because the later does not have padding). At the end we must fold the padding back to the interior, but taking into account the appropriate Bloch phases */ void Idag_D2S(struct Grid *sparsere, struct Grid *sparseim, struct Grid *densere, struct Grid *denseim , struct Dvec kvec, double *workspace1,double *workspace2,int lwork) { /* We have to do the reverse of I_S2D Idag works from bottom up, so 1.) go to the bottom do 2level convolutions of dense L<->(L-1) 2.) copy dense L onto sparse L+1 and its children 3.) when you reach the level zero must have top level w/ padding (it has the resolution of the sparse toplevel when we are dont with that one, copy the interior onto sparsetop */ struct Ivec pad={HFL_T, HFL_T, HFL_T}; /* struct Ivec pad={5, 5, 5}; */ struct Ivec tmporg;/* temp storage */ /* create the expanded top levels (for now the same as the output grid) */ struct Grid topre; struct Grid topim; double *work = workspace1; double *work_extra = workspace2; int i; /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (densere==NULL) return; /* Idag works from the bottom up, so go to the children first*/ Idag_D2S(sparsere->child, sparseim->child, densere->child, denseim->child, kvec, workspace1, workspace2, lwork); /* Prepare the top level: */ if(densere->level==0){ /* first make the top level as the input top level */ topre=*sparsere; topim=*sparseim; /* now add the padding */ topre.dim.x+=2*pad.x; topre.dim.y+=2*pad.y; topre.dim.z+=2*pad.z; topim.dim.x+=2*pad.x; topim.dim.y+=2*pad.y; topim.dim.z+=2*pad.z; topre.nxyz=topre.dim.x*topre.dim.y*topre.dim.z; topim.nxyz=topim.dim.x*topim.dim.y*topim.dim.z; /* the padded grid is non-periodic -hmmm it should be, actually */ topre.ifperiodic=topim.ifperiodic=0; /* allocate the data */ topre.dat = (double *)malloc(topre.nxyz*sizeof(double)); topim.dat = (double *)malloc(topim.nxyz*sizeof(double)); if(topre.dat==NULL || topim.dat==NULL){ printf("Unable to allcoate data in I_S2D!\n"); exit(1); } /* zero out the data */ for(i=0; iorg; /* save it for later undo */ densere->org=pad; densere->parent=&topre; denseim->org=pad; denseim->parent=&topim; } /* Best caching in vertical segements, so do the work (HERE!) before going on to siblings */ /* Make sure there is enough work space for covolution */ if ( lwork < (densere->dim.x+3*HFL_T) *(densere->dim.y+3*HFL_T)*(densere->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Idag for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", densere->dim.x,densere->dim.y,densere->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* TRICKY: Consistency of values is a little tricky here because your children may change your values and your parent needs to know this! (This is not an issue for the standard transforms because they conform to the real-coef convention naturally.) Now, to keep the values consistent, we need to update our parent after the children have changed us. It is best to do that BEFORE the siblings, so that the work they do is (sort of) consistent with these changes. I think there is a (small) chance that this is the correct thing to do if the siblings interfere with each other. Certainly, doing this here is needed to keep consistency of the ghost values. God help us if we start threading this! *** NOTE: this and the convolution happen in opposite orders in Idag and Jdag! This is so that Idag can be the inverse of Jdag. For this to happen, Idag needs to know what Jdag "saw". */ /* Update parent before doing convolution. */ updateparentre(densere); updateparentre(denseim); /* Do it! */ aPconv3d( densere->parent->dat + (densere->org.x*densere->parent->dim.y + densere->org.y) * densere->parent->dim.z + densere->org.z, densere->dat, 1, tr, tf, ts, densere->parent->dim.x, densere->parent->dim.y, densere->parent->dim.z, densere->dim.x, densere->dim.y, densere->dim.z, HFL_T, work, work_extra ); aPconv3d( denseim->parent->dat + (denseim->org.x*denseim->parent->dim.y + denseim->org.y) * denseim->parent->dim.z + denseim->org.z, denseim->dat, 1, tr, tf, ts, denseim->parent->dim.x, denseim->parent->dim.y, denseim->parent->dim.z, denseim->dim.x, denseim->dim.y, denseim->dim.z, HFL_T, work, work_extra ); /* Now copy the data to the sparse grid */ if(sparsere->child){ copytosiblings(sparsere->child, densere); copytosiblings(sparseim->child, denseim); } /* go to the siblings */ /* take care of the top level */ if (densere->level==0){ /* restore the original state of dense top level */ CopyfromBlochPad(sparsere, sparseim, &topre, &topim, kvec, pad); densere->parent=denseim->parent=NULL; densere->org=tmporg; denseim->org=tmporg; free(topre.dat); free(topim.dat); } Idag_D2S(sparsere->sibling, sparseim->sibling, densere->sibling, denseim->sibling, kvec, workspace1, workspace2, lwork); return; } /* this one is for real grids outs: output grid - sparse, Coeffspace ind: input grid - dense, Realspace */ void Idag_D2Sr(struct Grid *outs, struct Grid *ind, double *workspace1, double *workspace2,int lwork) { /* We have to do the reverse of I_S2D Idag works from bottom up, so 1.) go to the bottom do 2level convolutions of dense L<->(L-1) 2.) copy dense L onto sparse L+1 and its children 3.) when you reach the level zero must have top level w/ padding (it has the resolution of the sparse toplevel when we are dont with that one, copy the interior onto sparsetop */ struct Ivec pad={HFL_T, HFL_T, HFL_T}; /* struct Ivec pad={5, 5, 5}; */ struct Ivec tmporg;/* temp storage */ /* create the expanded top levels (for now the same as the output grid) */ struct Grid top; double *work = workspace1; double *work_extra = workspace2; int i; /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (ind==NULL) return; /* Idag works from the bottom up, so go to the children first*/ Idag_D2Sr(outs->child, ind->child, workspace1, workspace2, lwork); /* Prepare the top level: */ if(ind->level==0){ /* first make the top level as the input top level */ top=*outs; /* now add the padding */ top.dim.x+=2*pad.x; top.dim.y+=2*pad.y; top.dim.z+=2*pad.z; top.nxyz=top.dim.x*top.dim.y*top.dim.z; /* the padded grid is non-periodic -hmmm it should be, actually */ top.ifperiodic=0; /* allocate the data */ top.dat = (double *)malloc(top.nxyz*sizeof(double)); if(top.dat==NULL){ printf("Unable to allcoate data in I_S2Dr!\n"); exit(1); } /* zero out the data - will be updated later */ for(i=0; iorg; /* save it for later undo */ ind->org=pad; ind->parent=⊤ } /* Best caching in vertical segements, so do the work (HERE!) before going on to siblings */ /* Make sure there is enough work space for covolution */ if ( lwork < (ind->dim.x+3*HFL_T) *(ind->dim.y+3*HFL_T)*(ind->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Idag for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", ind->dim.x,ind->dim.y,ind->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* TRICKY: Consistency of values is a little tricky here because your children may change your values and your parent needs to know this! (This is not an issue for the standard transforms because they conform to the real-coef convention naturally.) Now, to keep the values consistent, we need to update our parent after the children have changed us. It is best to do that BEFORE the siblings, so that the work they do is (sort of) consistent with these changes. I think there is a (small) chance that this is the correct thing to do if the siblings interfere with each other. Certainly, doing this here is needed to keep consistency of the ghost values. God help us if we start threading this! *** NOTE: this and the convolution happen in opposite orders in Idag and Jdag! This is so that Idag can be the inverse of Jdag. For this to happen, Idag needs to know what Jdag "saw". */ /* Update parent before doing convolution. */ updateparentre(ind); /* Do it! */ aPconv3d( ind->parent->dat + (ind->org.x*ind->parent->dim.y + ind->org.y) * ind->parent->dim.z + ind->org.z, ind->dat, 1, tr, tf, ts, ind->parent->dim.x, ind->parent->dim.y, ind->parent->dim.z, ind->dim.x, ind->dim.y, ind->dim.z, HFL_T, work, work_extra ); /* Now copy the data to the sparse grid */ copytosiblings(outs->child, ind); /* go to the siblings */ /* take care of the top level */ if (ind->level==0){ /* restore the original state of dense top level */ CopyfromPadded_p(outs, &top, pad); ind->parent=NULL; ind->org=tmporg; free(top.dat); } Idag_D2Sr(outs->sibling, ind->sibling, workspace1, workspace2, lwork); return; } /* Multilevel Jdag (IN PLACE, RECURSIVE FUNCTION) self: pointer to grid to be transformed work: pointer to work space of size >= prod(dim+3*HFL) lwork: size of space pointed to by work (for checking purposes) */ void Jdag(struct Grid *self,double *work,double *work_extra,int lwork) { int i,j,k; /* Return immediately if there is nothing to do */ /* No excutables BEFORE this statement, please! */ if (self==NULL) return; /* Jdag works from the top down, so do work now, before the children */ /* For Jdag "self" takes the role of the lower level. Thus Jdag does no actual work on the top level! */ if (self->level > 0) { /* Make sure there is enough work space */ if ( lwork < (self->dim.x+3*HFL_T) *(self->dim.y+3*HFL_T)*(self->dim.z+3*HFL_T) ) { fprintf(stderr,"Insufficient work space in Jdag for "); fprintf(stderr,"data set of size (%dx%dx%d)\n", self->dim.x,self->dim.y,self->dim.z); fprintf(stderr,"Exiting.\n"); exit(1); } /* Do it! */ aPconv3d( self->parent->dat+ (self->org.x * self->parent->dim.y + self->org.y) * self->parent->dim.z + self->org.z, self->dat, -1, tr, tf, ts, self->parent->dim.x, self->parent->dim.y, self->parent->dim.z, self->dim.x, self->dim.y, self->dim.z, HFL_T, work, work_extra ); } /* Print statement(s) HERE AS WORK IS DONE for debugging */ /* { */ /* int level; */ /* printf("Jdag: "); */ /* for (level=0; level< self->level; level++) */ /* printf(" "); */ /* printf("[%d]\n",self->level); */ /* } */ /* Best caching in vertical segements, so do children FIRST, and THEN sibblings */ Jdag(self->child, work,work_extra, lwork); /* TRICKY: Consistency of values is a little tricky here because your children may change your values and your parent needs to know this! (This is not an issue for the standard transforms because they conform to the real-coef convention naturally.) Now, to keep the values consistent, we need to update our parent after the children have changed us. It is best to do that BEFORE the siblings, so that the work they do is (sort of) consistent with these changes. I think there is a (small) chance that this is the correct thing to do if the siblings interfere with each other. Certainly, doing this here is needed to keep consistency of the ghost values. God help us if we start threading this! */ /* Copy ghost values to update parent (if there is one) */ if (self->level>0) for (i=0; idim.x; i+=2) for (j=0; jdim.y; j+=2) for (k=0; kdim.z; k+=2) *( self->parent->dat + ( (self->org.x+i/2) * self->parent->dim.y + (self->org.y+j/2) ) * self->parent->dim.z + (self->org.z+k/2) ) = *( self->dat+(i*self->dim.y+j)*self->dim.z+k ); /* Ok, now do siblings */ Jdag(self->sibling, work, work_extra, lwork); return; }