/* DFT++ is a density functional package developed by the research group of Professor Tomas Arias Copyright 1996-2003 Sohrab Ismail-Beigi This file is part of DFT++. DFT++ is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DFT++ is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DFT++; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please see the file CREDITS for a list of authors. For academic users, we request that publications using results obtained with this software reference "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000). and, if using the wavelet basis, further reference "Multiresolution analysis of electronic structure: semicardinal and wavelet bases," T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999). and "Robust ab initio calculation of condensed matter: transparent convergence through semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003). For your convenience, preprints of the above articles may be obtained from http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively. */ #include "header.h" // The maximum number of digits to which we can converge invL. #define MAX_CONVDIGITS 11 void CopyBlochPad(struct Grid *outre, struct Grid *outim, struct Grid *inre, struct Grid *inim, struct Dvec kvec, struct Ivec pad); void CopyfromPadded(struct Grid *output, struct Grid *input, struct Ivec pad); void apply_L_ORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output) { #ifdef DFT_PROFILING timerOn(31); // turn on L timer counterIncr(23); #endif // DFT_PROFILING if(input.representation == REALSPACE) die("Trying to take L of realspace bundle.\n"); // Set the representation and quantum numbers of the output. output.representation=input.representation; if(output.embedding!=input.embedding) die("Cannot do apply_L(%s, %s)! Write me!\n", input.getembedding(), output.getembedding()); //get the gridspecs of the input (shouild be the same as output) Gridspec *gridspec; switch(input.embedding){ case SPARSE: gridspec = input.basis_spec->gridspec; break; case DENSE: gridspec = input.basis_spec->gridspec_dense; break; default: die("apply_L: Unknown input embedding %d\n",input.embedding); } // output.kvec=input.kvec; double kx, ky, kz; if(input.basis->qnum){ // hack it chargedensities don't have qnum kx=input.basis->qnum->kvec.v[0]; ky=input.basis->qnum->kvec.v[1]; kz=input.basis->qnum->kvec.v[2]; } else kx=ky=kz=0.; //dft_log("USING kpt: %lf %lf %lf\n",kx, ky, kz); // Declare workspace variables for operators. double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)]; double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)]; real *workspace1, *workspace2; // Allocate workspaces workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace1", "apply_L"); workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace2", "apply_L"); if (kx==0. && ky==0. && kz==0.) { // no need of bloch phase // Do the real part. Grid *tempin = mkgridnew(gridspec,NULL); Grid *tempout = mkgridnew(gridspec,NULL); Grid *helpgrid = mkgridnew(gridspec,NULL); GetRe(tempin,input); zeroghostrec(tempin); L(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); PutRe(tempout,output); // Now the imaginary part/ GetIm(tempin,input); zeroghostrec(tempin); L(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); PutIm(tempout,output); // Free the memory allocated to the grids. killgridnew(tempin); killgridnew(tempout); killgridnew(helpgrid); } else { // Here, we need two helper grids, since CopyBlochPad needs to deal with // real and imaginary parts simultaneously. Grid *input_re, *input_im, *output_re, *output_im; input_re = mkgridnew(gridspec,NULL); input_im = mkgridnew(gridspec,NULL); output_re = mkgridnew(gridspec,NULL); output_im = mkgridnew(gridspec,NULL); GetRe(input_re,input); GetIm(input_im,input); // create expanded top level to hold the top level with // proper wrapping when going to the neighbouring cells struct Grid FgridinRe, FgridoutRe,FgridinIm, FgridoutIm; struct Ivec xpnd = {HFL_L1, HFL_L1, HFL_L1}; struct Ivec rvrs = {-HFL_L1, -HFL_L1, -HFL_L1}; FgridinRe.level = 0; // since Fgrid has the pieces from the neighbouring cells we // do nonperiodic calculation FgridinRe.ifperiodic=non_periodic; FgridinRe.org.x = input_re->org.x - xpnd.x; FgridinRe.org.y = input_re->org.y - xpnd.y; FgridinRe.org.z = input_re->org.z - xpnd.z; FgridinRe.dim.x = input_re->dim.x + 2*xpnd.x; FgridinRe.dim.y = input_re->dim.y + 2*xpnd.y; FgridinRe.dim.z = input_re->dim.z + 2*xpnd.z; FgridinRe.scale = input_re->scale; FgridinRe.parent = NULL; // it is the top level FgridinRe.nxyz = FgridinRe.dim.x*FgridinRe.dim.y*FgridinRe.dim.z; FgridinRe.nxyzwholegrid = -100000;// meaningles, not bgrid // do all of the above for FgridinIm,FgridoutRe, FgridoutIm FgridinIm=FgridinRe; FgridoutRe=FgridinRe; FgridoutIm=FgridinRe; // allocate the space for the data FgridinRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridinRe", "apply_L"); FgridinIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridinIm", "apply_L"); FgridoutRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridoutRe", "apply_L"); FgridoutIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridoutIm", "apply_L"); Grid *helpgrid = mkgridnew(gridspec,NULL); // in order that Labovenew works correctly helpgrid and ingrid have // to be identical structures on levels below toplevel // we must change the origin of helpgrid as well if(helpgrid->child) setnewparent(helpgrid->child,xpnd,helpgrid); // input.Re.Cgrid[ig]->child child of FgridinRe FgridinRe.child = input_re->child; FgridinRe.sibling = input_re->sibling; // the same for FgridinIm,FgridoutRe, FgridoutIm FgridinIm.child = input_im->child; FgridinIm.sibling = input_im->sibling; FgridoutRe.child = output_re->child; FgridoutRe.sibling = output_re->sibling; FgridoutIm.child = output_im->child; FgridoutIm.sibling = output_im->sibling; // make Fgrid parent of Xput.Cgrid[ig]->child and it siblings if(input_re->child) { setnewparent(input_re->child,xpnd,&FgridinRe); setnewparent(input_im->child,xpnd,&FgridinIm); setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } // Copy the data to the padded grid w/ the appropriate // boundary conditions // The convention in the wavelet code is that the k-points should be // in reciprocal coordinates. Here, however, the k-points are in // fractional coordinates. CopyBlochPad converts them. Dvec oldk; oldk.x = kx; oldk.y = ky; oldk.z = kz; CopyBlochPad(&FgridinRe, &FgridinIm, input_re, input_im, oldk, xpnd); zeroghostrec(&FgridinRe); zeroghostrec(&FgridinIm); L(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2); L(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2); //zeroghostrec(out) is done inside L() // copy the data from FgridoutRe/Im top level back to // output.Re/Im.Cgrid[ig] CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); // return the original parents of input.Re.Cgrid[ig] and // output.Re.Cgrid[i]; if(input_re->child) { setnewparent(input_re->child,rvrs,input_re); setnewparent(input_im->child,rvrs,input_im); setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Put the gridclasses back. PutRe(output_re,output); PutIm(output_im,output); // free the memory of the fake top levels myfree(FgridinRe.dat); myfree(FgridinIm.dat); myfree(FgridoutRe.dat); myfree(FgridoutIm.dat); // restore the origin of helpgrid as it was initially setnewparent(helpgrid->child,rvrs,helpgrid); // Free the Grid's. killgridnew(input_re); killgridnew(input_im); killgridnew(output_re); killgridnew(output_im); killgridnew(helpgrid); } myfree(workspace1); myfree(workspace2); #ifdef DFT_PROFILING timerOff(31); // turn off L timer #endif // DFT_PROFILING } // Works with non-orthorhombic cells! -- mhe 10/23/02 void apply_L_NORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output) { #ifdef DFT_PROFILING timerOn(31); // turn on L timer counterIncr(23); #endif // DFT_PROFILING Lattice *lattice=input.basis_spec->lattice; if(input.representation == REALSPACE) die("Trying to take L of realspace bundle.\n"); // Set the representation and quantum numbers of the output. output.representation=input.representation; if(output.embedding!=input.embedding) die("Cannot do apply_L(%s, %s)! Write me!\n", input.getembedding(), output.getembedding()); //get the gridspecs of the input (shouild be the same as output) Gridspec *gridspec; switch(input.embedding){ case SPARSE: gridspec = input.basis_spec->gridspec; break; case DENSE: gridspec = input.basis_spec->gridspec_dense; break; default: die("apply_L: Unknown input embedding %d\n",input.embedding); } // output.kvec=input.kvec; double kx, ky, kz; if(input.basis->qnum){ // hack it chargedensities don't have qnum kx=input.basis->qnum->kvec.v[0]; ky=input.basis->qnum->kvec.v[1]; kz=input.basis->qnum->kvec.v[2]; } else kx=ky=kz=0.; //dft_log("USING kpt: %lf %lf %lf\n",kx, ky, kz); // Get the grid dimensions. int N1=gridspec[0].dim.x, N2=gridspec[0].dim.y, N3=gridspec[0].dim.z, Ntot=N1*N2*N3; double volfactor=lattice->unit_cell_volume/((double)Ntot*4*M_PI*M_PI); // Declare workspace variables for operators. double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)]; double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)]; real *workspace1, *workspace2; // Allocate workspaces workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace1", "apply_L"); workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace2", "apply_L"); if (kx==0. && ky==0. && kz==0.) { // no need of bloch phase // Do the real part. Grid *tempin = mkgridnew(gridspec,NULL); Grid *tempout = mkgridnew(gridspec,NULL); Grid *helpgrid = mkgridnew(gridspec,NULL); // This grid holds the sum of the various components of L. Grid *sum = mkgridnew(gridspec,NULL); evalgrid(sum,NULLDVEC,zero); GetRe(tempin,input); zeroghostrec(tempin); // Do dx*dx. if(lattice->GGT.m[0][0] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,0); scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)), tempout); } // Do dx*dy. if(lattice->GGT.m[0][1] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,1); scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)), tempout); } // Do dx*dz. if(lattice->GGT.m[0][2] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,2); scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)), tempout); } // Do dy*dy. if(lattice->GGT.m[1][1] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,1); scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)), tempout); } // Do dy*dz. if(lattice->GGT.m[1][2] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,2); scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)), tempout); } // Do dz*dz. if(lattice->GGT.m[2][2] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,2,2); scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)), tempout); } // Finish up. PutRe(sum,output); // Now the imaginary part evalgrid(sum,NULLDVEC,zero); GetIm(tempin,input); zeroghostrec(tempin); // Do dx*dx. if(lattice->GGT.m[0][0] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,0); scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)), tempout); } // Do dx*dy. if(lattice->GGT.m[0][1] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,1); scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)), tempout); } // Do dx*dz. if(lattice->GGT.m[0][2] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,2); scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)), tempout); } // Do dy*dy. if(lattice->GGT.m[1][1] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,1); scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)), tempout); } // Do dy*dz. if(lattice->GGT.m[1][2] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,2); scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)), tempout); } // Do dz*dz. if(lattice->GGT.m[2][2] != 0.0) { cD(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,2,2); scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)), tempout); } // Finish up. PutIm(sum,output); // Free the memory allocated to the grids. killgridnew(tempin); killgridnew(tempout); killgridnew(helpgrid); killgridnew(sum); } else { // Here, we need two helper grids, since CopyBlochPad needs to deal with // real and imaginary parts simultaneously. Grid *input_re, *input_im, *output_re, *output_im, *sum_re, *sum_im; input_re = mkgridnew(gridspec,NULL); input_im = mkgridnew(gridspec,NULL); output_re = mkgridnew(gridspec,NULL); output_im = mkgridnew(gridspec,NULL); sum_re = mkgridnew(gridspec,NULL); sum_im = mkgridnew(gridspec,NULL); evalgrid(sum_re,NULLDVEC,zero); evalgrid(sum_im,NULLDVEC,zero); GetRe(input_re,input); GetIm(input_im,input); // create expanded top level to hold the top level with // proper wrapping when going to the neighbouring cells struct Grid FgridinRe, FgridoutRe,FgridinIm, FgridoutIm; struct Ivec xpnd = {HFL_L1, HFL_L1, HFL_L1}; struct Ivec rvrs = {-HFL_L1, -HFL_L1, -HFL_L1}; FgridinRe.level = 0; // since Fgrid has the pieces from the neighbouring cells we // do nonperiodic calculation FgridinRe.ifperiodic=non_periodic; FgridinRe.org.x = input_re->org.x - xpnd.x; FgridinRe.org.y = input_re->org.y - xpnd.y; FgridinRe.org.z = input_re->org.z - xpnd.z; FgridinRe.dim.x = input_re->dim.x + 2*xpnd.x; FgridinRe.dim.y = input_re->dim.y + 2*xpnd.y; FgridinRe.dim.z = input_re->dim.z + 2*xpnd.z; FgridinRe.scale = input_re->scale; FgridinRe.parent = NULL; // it is the top level FgridinRe.nxyz = FgridinRe.dim.x*FgridinRe.dim.y*FgridinRe.dim.z; FgridinRe.nxyzwholegrid = -100000;// meaningles, not bgrid // do all of the above for FgridinIm,FgridoutRe, FgridoutIm FgridinIm=FgridinRe; FgridoutRe=FgridinRe; FgridoutIm=FgridinRe; // allocate the space for the data FgridinRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridinRe", "apply_L"); FgridinIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridinIm", "apply_L"); FgridoutRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridoutRe", "apply_L"); FgridoutIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridoutIm", "apply_L"); Grid *helpgrid = mkgridnew(gridspec,NULL); // in order that Labovenew works correctly helpgrid and ingrid have // to be identical structures on levels below toplevel // we must change the origin of helpgrid as well if(helpgrid->child) setnewparent(helpgrid->child,xpnd,helpgrid); // input.Re.Cgrid[ig]->child child of FgridinRe FgridinRe.child = input_re->child; FgridinRe.sibling = input_re->sibling; // the same for FgridinIm,FgridoutRe, FgridoutIm FgridinIm.child = input_im->child; FgridinIm.sibling = input_im->sibling; FgridoutRe.child = output_re->child; FgridoutRe.sibling = output_re->sibling; FgridoutIm.child = output_im->child; FgridoutIm.sibling = output_im->sibling; // make Fgrid parent of Xput.Cgrid[ig]->child and it siblings if(input_re->child) { setnewparent(input_re->child,xpnd,&FgridinRe); setnewparent(input_im->child,xpnd,&FgridinIm); } // Copy the data to the padded grid w/ the appropriate // boundary conditions // The convention in the wavelet code is that the k-points should be // in reciprocal coordinates. Here, however, the k-points are in // fractional coordinates. CopyBlochPad converts them. Dvec oldk; oldk.x = kx; oldk.y = ky; oldk.z = kz; CopyBlochPad(&FgridinRe, &FgridinIm, input_re, input_im, oldk, xpnd); zeroghostrec(&FgridinRe); zeroghostrec(&FgridinIm); // Do dx*dx. if(lattice->GGT.m[0][0] != 0.0) { if(output_re->child) { setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } cD(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,0); cD(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,0); CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); if(output_re->child) { setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Add to sum. scalarmultcumnew(sum_re,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)), output_re); scalarmultcumnew(sum_im,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)), output_im); } // Do dx*dy. if(lattice->GGT.m[0][1] != 0.0) { if(output_re->child) { setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } cD(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,1); cD(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,1); CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); if(output_re->child) { setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Add to sum. scalarmultcumnew(sum_re,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)), output_re); scalarmultcumnew(sum_im,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)), output_im); } // Do dx*dz. if(lattice->GGT.m[0][2] != 0.0) { if(output_re->child) { setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } cD(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,2); cD(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,2); CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); if(output_re->child) { setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Add to sum. scalarmultcumnew(sum_re,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)), output_re); scalarmultcumnew(sum_im,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)), output_im); } // Do dy*dy. if(lattice->GGT.m[1][1] != 0.0) { if(output_re->child) { setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } cD(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,1,1); cD(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,1,1); CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); if(output_re->child) { setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Add to sum. scalarmultcumnew(sum_re,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)), output_re); scalarmultcumnew(sum_im,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)), output_im); } // Do dy*dz. if(lattice->GGT.m[1][2] != 0.0) { if(output_re->child) { setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } cD(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,1,2); cD(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,1,2); CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); if(output_re->child) { setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Add to sum. scalarmultcumnew(sum_re,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)), output_re); scalarmultcumnew(sum_im,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)), output_im); } // Do dz*dz. if(lattice->GGT.m[2][2] != 0.0) { if(output_re->child) { setnewparent(output_re->child,xpnd,&FgridoutRe); setnewparent(output_im->child,xpnd,&FgridoutIm); } cD(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,2,2); cD(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,2,2); CopyfromPadded(output_re, &FgridoutRe, xpnd); CopyfromPadded(output_im, &FgridoutIm, xpnd); if(output_re->child) { setnewparent(output_re->child,rvrs,output_re); setnewparent(output_im->child,rvrs,output_im); } // Add to sum. scalarmultcumnew(sum_re,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)), output_re); scalarmultcumnew(sum_im,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)), output_im); } if(input_re->child) { setnewparent(input_re->child,rvrs,input_re); setnewparent(input_im->child,rvrs,input_im); } // Put the gridclasses back. PutRe(sum_re,output); PutIm(sum_im,output); // free the memory of the fake top levels myfree(FgridinRe.dat); myfree(FgridinIm.dat); myfree(FgridoutRe.dat); myfree(FgridoutIm.dat); // restore the origin of helpgrid as it was initially setnewparent(helpgrid->child,rvrs,helpgrid); // Free the Grid's. killgridnew(input_re); killgridnew(input_im); killgridnew(output_re); killgridnew(output_im); killgridnew(sum_re); killgridnew(sum_im); killgridnew(helpgrid); } myfree(workspace1); myfree(workspace2); #ifdef DFT_PROFILING timerOff(31); // turn off L timer #endif // DFT_PROFILING } //ipd: this is a hack to matt's nonorthorhombic laplacian so that we can //use different code for the orthorhombic cells void apply_L(const WL_ComplexColumn &input, WL_ComplexColumn &output) { matrix3 l=input.basis_spec->lattice->GGT; if((l.m[0][1] == 0.0) && (l.m[0][2] == 0.0) && (l.m[1][2] == 0.0)\ && (input.basis_spec->use_L_orthorhombic == TRUE)) apply_L_ORTHORHOMBIC(input,output); else apply_L_NORTHORHOMBIC(input,output); } // The invL operator ( p = invL(q) ) is obtained via a variational solution of: // L(p) = q, minimizing F[p] = (1/2)*p^L(p) - p^q // note: q = input, p = output // We assume that the input column has zero total integral! (constants are // in the null space of L) // This is the old version, whose algorithm doesn't exploit linearity, and // which uses complex operators and temporaries. void apply_invL_OLD(const WL_ComplexColumn &input, WL_ComplexColumn &output) { #ifdef DFT_PROFILING timerOn(32); // turn on invL timer #endif // DFT_PROFILING // We have to figure out how to pass this into invL!!! int digits, convdigits, iter = 0, anotherstep = 1; // If we do more iterations than dieiter, we should quit. const int dieiter = 200; double F, a, b, lambda, gradmag, pgradmag; WL_ComplexColumn grad(input), pgrad(input), temp(input), d(input), pd(input); #ifdef DFT_PROFILING // Declare timeval's so that we can time each iteration. timeval time1,time2; double rtimepoi,rtimepoicum=0; #endif // DFT_PROFILING // Start with a zero initial solution. output.zero_out(); // HACK ALERT!!! // Calculate the number of convergence digits using global_abs2Yg. convdigits = 4-(int) floor(log((1.e-100+sqrt(global_abs2Yg)))/log(10.)); if(convdigits > MAX_CONVDIGITS) convdigits = MAX_CONVDIGITS; dft_log("\nConvdigits is %d in Poisson solver.\n"); while(anotherstep) { #ifdef DFT_PROFILING gettimeofday(&time1,NULL); #endif // DFT_PROFILING // Set the previous gradient before we compute a new one. pgrad = grad; pgradmag = gradmag; // Calculate the functional F = (1/2)*p^L(p) - p^q apply_L(output,temp); // temp = L(p) F = (0.5)*(REAL(output^temp)) - (REAL(output^input)); // Calculate the gradient dF = (1/2)*L(p) - (1/2)*q grad = temp; grad -= input; grad *= 0.5; //dft_log("|grad|^2 = %1.12lf\n",REAL(grad^grad)); apply_K(grad,d); // Store grad^grad for the conjugacy parameter. gradmag = REAL(grad^d); if(iter > 0) { // beyond first iteration: // d_n = g_n + (g_n^g_n)/(g_(n-1)^g_(n-1))*d_(n-1) pd *= (gradmag/pgradmag); d += pd; } // Given a search direction, do the line minimization. // p_(n+1) = p_n + lambda*d_n // Compute the fit parameters to the parabola: // F(lambda) = a*lambda^2 + b*lambda + c = 0 // c = F // b = grad^d // a = (1/2)*(d^L(d)) b = 2.0*REAL(grad^d); apply_L(d,temp); a = 0.5*(REAL(d^temp)); // dF/d(lambda) ==> lambda_0 = -b/(2*a) lambda = (-1.0*b)/(2.0*a); // Store the search direction before we change it. pd = d; // Update the output vector. d *= lambda; output += d; iter++; // Let's see how close we are to convergence. digits = (int) floor(-log(1.e-100+sqrt(REAL(grad^grad)))/log(10.)); dft_log("\niteration %d, digits = %d \n",iter,digits); if(digits >= convdigits) anotherstep = 0; // We're done! #ifdef DFT_PROFILING gettimeofday(&time2,NULL); rtimepoi=1.*(time2.tv_sec-time1.tv_sec)+ (time2.tv_usec-time1.tv_usec)/1.e6; rtimepoicum+=rtimepoi; dft_log("Single and total iteration time: %.3f sec %.3f sec\n", rtimepoi,rtimepoicum); #endif // DFT_PROFILING // Let's bail out if we haven't converged in dieiter iterations. if(iter > dieiter) die("invL failed to converge to %d digits in %d iterations!\n", convdigits, dieiter); } dft_log("\n",iter,digits); dft_log_flush(); // Output now contains invL(input). #ifdef DFT_PROFILING timerOff(32); // turn off invL timer #endif // DFT_PROFILING } // This version of apply_invLreal uses only the C-level grid structures, // to save memory. When we construct WL_RealColumn, this will be made // obsolete. -- mhe 3/20/02 void apply_invL_ORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output) { #ifdef DFT_PROFILING timerOn(32); // turn on invL timer #endif // DFT_PROFILING // We have to figure out how to pass this into invL!!! int digits, convdigits, iter = 0, anotherstep = 1; // If we do more iterations than dieiter, we should quit. const int dieiter = 500; double a, b, lambda, gradmag, pgradmag; // WL_ComplexColumn grad(input), d(input), pd(input); Gridspec *gridspec; // Before we use this routine, we'd better make sure that we're actually // operating on a real column! double imagmag = 0.0; for(int ii=0; ii < input.length; ii++) imagmag += fabs(input.data.d[ii].y); if(imagmag > ((1.e-12)*input.length)) { dft_log("\nImaginary magnitude is %1.12lf in solve_poisson.\n",imagmag); die("Are you sure you want to use a complex column here?\n"); } switch(input.embedding){ case SPARSE: gridspec = input.basis_spec->gridspec; break; case DENSE: gridspec = input.basis_spec->gridspec_dense; break; default: die("apply_L: Unknown input embedding %d\n",input.embedding); } // Set up the PCG work grids. Grid *grad = mkgridnew(gridspec,NULL); Grid *d = mkgridnew(gridspec,NULL); Grid *pd = mkgridnew(gridspec,NULL); // Set up a workspace for the L operator. // Declare workspace variables for operators. double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)]; double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)]; real *workspace1, *workspace2; // Allocate workspaces workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace1", "apply_L"); workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace2", "apply_L"); Grid *helpgrid = mkgridnew(gridspec,NULL); // Grab the real part of the input grid. Grid *in = mkgridnew(gridspec,NULL); GetRe(in,input); // Set up a grid for output. Grid *out = mkgridnew(gridspec,NULL); #ifdef DFT_PROFILING // Declare timeval's so that we can time each iteration. timeval time1,time2; double rtimepoi,rtimepoicum=0; #endif // DFT_PROFILING // Start with a zero initial solution. evalgrid(out,NULLDVEC,zero); evalgrid(grad,NULLDVEC,zero); // grad = 0.5*L(output) - 0.5*input = -0.5*input //grad -= input; grad *= 0.5; //scalarmultsubtract(grad,0.5,in); scalarmultcumnew(grad,-0.5,in); // HACK ALERT!!! // Calculate the number of convergence digits using global_abs2Yg. convdigits = 4-(int) floor(log((1.e-100+sqrt(global_abs2Yg)))/log(10.)); if(convdigits > MAX_CONVDIGITS) convdigits = MAX_CONVDIGITS; dft_log("\nConvdigits is %d in Poisson solver.\n",convdigits); //cltime = 0.; while(anotherstep) { #ifdef DFT_PROFILING gettimeofday(&time1,NULL); #endif // DFT_PROFILING // Set the previous gradient before we compute a new one. pgradmag = gradmag; //apply_Kreal(grad,d); copygridnew(d,grad); preconddiag(d,1); // Store grad^grad for the conjugacy parameter. //gradmag = dot(grad,d); gradmag = gridcarrotnew(grad,d); if(iter > 0) { // beyond first iteration: // d_n = g_n + (g_n^g_n)/(g_(n-1)^g_(n-1))*d_(n-1) //pd *= (gradmag/pgradmag); //d += pd; //scalarmultadd(d,(gradmag/pgradmag),pd); scalarmultcumnew(d,(gradmag/pgradmag),pd); } // Given a search direction, do the line minimization. // p_(n+1) = p_n + lambda*d_n // Compute the fit parameters to the parabola: // F(lambda) = a*lambda^2 + b*lambda + c = 0 // c = F // b = grad^d // a = (1/2)*(d^L(d)) b = 2.0*gridcarrotnew(grad,d); // apply_Lreal(d,pd); a = 0.5*(dot(d,pd)); #ifdef DFT_PROFILING timerOn(31); counterIncr(23); #endif // DFT_PROFILING L(pd,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); #ifdef DFT_PROFILING timerOff(31); #endif // DFT_PROFILING a = 0.5*gridcarrotnew(d,pd); // dF/d(lambda) = 0 --> lambda_0 = -b/(2*a) lambda = (-1.0*b)/(2.0*a); //dft_log("%1.12lf\n",lambda); // Set the current gradient to the updated gradient. // Use pd = L(d). //pd *= (0.5*lambda); grad += pd; //scalarmultadd(grad,(0.5*lambda),pd); scalarmultcumnew(grad,(0.5*lambda),pd); // Store the search direction before we change it. //pd = d; copygridnew(pd,d); // Update the output vector. //d *= lambda; output += d; //scalarmultadd(out,lambda,d); scalarmultcumnew(out,lambda,d); // grad now contains L(old_output + lambda*d) - input // Let's see how close we are to convergence. digits = (int) floor(-log(1.e-100+sqrt(gridcarrotnew(grad,grad)))/log(10.)); //dft_log("\niteration %d, digits = %d \n",iter,digits); dft_log("%d ",digits); dft_log_flush(); if(digits >= convdigits) anotherstep = 0; // We're done! iter++; #ifdef DFT_PROFILING gettimeofday(&time2,NULL); rtimepoi=1.*(time2.tv_sec-time1.tv_sec)+ (time2.tv_usec-time1.tv_usec)/1.e6; rtimepoicum+=rtimepoi; // dft_log("\nSingle and total iteration time: %.3f sec %.3f sec\n", // rtimepoi,rtimepoicum); #endif // DFT_PROFILING // Let's bail out if we haven't converged in dieiter iterations. if(iter > dieiter) die("\ninvL failed to converge to %d digits in %d iterations!\n", convdigits, dieiter); } //dft_log("\nTotal time spent in L (C only) = %.3f sec\n",cltime); dft_log("\n"); PutRe(out,output); // Output now contains invL(input). // Free temporaries. myfree(workspace1); myfree(workspace2); killgridnew(out); killgridnew(in); killgridnew(grad); killgridnew(d); killgridnew(pd); killgridnew(helpgrid); #ifdef DFT_PROFILING timerOff(32); // turn off invL timer #endif // DFT_PROFILING } // This version of apply_invLreal uses only the C-level grid structures, // to save memory. When we construct WL_RealColumn, this will be made // obsolete. -- mhe 3/20/02 void apply_invL_NORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output) { #ifdef DFT_PROFILING timerOn(32); // turn on invL timer #endif // DFT_PROFILING // We have to figure out how to pass this into invL!!! int digits, convdigits, iter = 0, anotherstep = 1; // If we do more iterations than dieiter, we should quit. const int dieiter = 500; double a, b, lambda, gradmag, pgradmag; // WL_ComplexColumn grad(input), d(input), pd(input); Gridspec *gridspec; Lattice *lattice=input.basis_spec->lattice; // Before we use this routine, we'd better make sure that we're actually // operating on a real column! double imagmag = 0.0; for(int ii=0; ii < input.length; ii++) imagmag += fabs(input.data.d[ii].y); if(imagmag > ((1.e-12)*input.length)) { dft_log("\nImaginary magnitude is %1.12lf in solve_poisson.\n",imagmag); die("Are you sure you want to use a complex column here?\n"); } switch(input.embedding){ case SPARSE: gridspec = input.basis_spec->gridspec; break; case DENSE: gridspec = input.basis_spec->gridspec_dense; break; default: die("apply_L: Unknown input embedding %d\n",input.embedding); } // Get the grid dimensions. int N1=gridspec[0].dim.x, N2=gridspec[0].dim.y, N3=gridspec[0].dim.z, Ntot=N1*N2*N3; double volfactor=lattice->unit_cell_volume/((double)Ntot*4.0*M_PI*M_PI); // Set up the PCG work grids. Grid *grad = mkgridnew(gridspec,NULL); Grid *d = mkgridnew(gridspec,NULL); Grid *pd = mkgridnew(gridspec,NULL); Grid *tempout = mkgridnew(gridspec,NULL); evalgrid(tempout,NULLDVEC,zero); // Set up a workspace for the L operator. // Declare workspace variables for operators. double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)]; double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)]; real *workspace1, *workspace2; // Allocate workspaces workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace1", "apply_L"); workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace2", "apply_L"); Grid *helpgrid = mkgridnew(gridspec,NULL); // Grab the real part of the input grid. Grid *in = mkgridnew(gridspec,NULL); GetRe(in,input); // Set up a grid for output. Grid *out = mkgridnew(gridspec,NULL); #ifdef DFT_PROFILING // Declare timeval's so that we can time each iteration. timeval time1,time2; double rtimepoi,rtimepoicum=0; #endif // DFT_PROFILING // Start with a zero initial solution. evalgrid(out,NULLDVEC,zero); evalgrid(grad,NULLDVEC,zero); // grad = 0.5*L(output) - 0.5*input = -0.5*input //grad -= input; grad *= 0.5; //scalarmultsubtract(grad,0.5,in); scalarmultcumnew(grad,-0.5,in); // HACK ALERT!!! // Calculate the number of convergence digits using global_abs2Yg. convdigits = 4-(int) floor(log((1.e-100+sqrt(global_abs2Yg)))/log(10.)); if(convdigits > MAX_CONVDIGITS) convdigits = MAX_CONVDIGITS; dft_log("\nConvdigits is %d in Poisson solver.\n",convdigits); //cltime = 0.; while(anotherstep) { #ifdef DFT_PROFILING gettimeofday(&time1,NULL); #endif // DFT_PROFILING // Set the previous gradient before we compute a new one. pgradmag = gradmag; //apply_Kreal(grad,d); copygridnew(d,grad); preconddiag(d,1); // Store grad^grad for the conjugacy parameter. //gradmag = dot(grad,d); gradmag = gridcarrotnew(grad,d); if(iter > 0) { // beyond first iteration: // d_n = g_n + (g_n^g_n)/(g_(n-1)^g_(n-1))*d_(n-1) //pd *= (gradmag/pgradmag); //d += pd; //scalarmultadd(d,(gradmag/pgradmag),pd); scalarmultcumnew(d,(gradmag/pgradmag),pd); } // Given a search direction, do the line minimization. // p_(n+1) = p_n + lambda*d_n // Compute the fit parameters to the parabola: // F(lambda) = a*lambda^2 + b*lambda + c = 0 // c = F // b = grad^d // a = (1/2)*(d^L(d)) b = 2.0*gridcarrotnew(grad,d); // apply_Lreal(d,pd); a = 0.5*(dot(d,pd)); // DEBUG!!! //dft_log("volfactor = %.6lf N1=%d N2=%d N3=%d\n",volfactor,N1,N2,N3); #ifdef DFT_PROFILING timerOn(31); counterIncr(23); #endif // DFT_PROFILING //L(test,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); // Non-orthrhombic L // Do dx*dx. evalgrid(pd,NULLDVEC,zero); if(lattice->GGT.m[0][0] != 0.0) { cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,0); scalarmultcumnew(pd,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)), tempout); } // Do dx*dy. if(lattice->GGT.m[0][1] != 0.0) { cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,1); scalarmultcumnew(pd,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)), tempout); dft_log("got here\n"); } // Do dx*dz. if(lattice->GGT.m[0][2] != 0.0) { cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,0,2); scalarmultcumnew(pd,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)), tempout); dft_log("got here\n"); } // Do dy*dy. if(lattice->GGT.m[1][1] != 0.0) { cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,1,1); scalarmultcumnew(pd,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)), tempout); } // Do dy*dz. if(lattice->GGT.m[1][2] != 0.0) { cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,1,2); scalarmultcumnew(pd,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)), tempout); dft_log("got here\n"); } // Do dz*dz. if(lattice->GGT.m[2][2] != 0.0) { cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls, helpgrid,workspace1,workspace2,2,2); scalarmultcumnew(pd,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)), tempout); } #ifdef DFT_PROFILING timerOff(31); #endif // DFT_PROFILING a = 0.5*gridcarrotnew(d,pd); // dF/d(lambda) = 0 --> lambda_0 = -b/(2*a) lambda = (-1.0*b)/(2.0*a); //dft_log("%1.12lf\n",lambda); // Set the current gradient to the updated gradient. // Use pd = L(d). //pd *= (0.5*lambda); grad += pd; //scalarmultadd(grad,(0.5*lambda),pd); scalarmultcumnew(grad,(0.5*lambda),pd); // Store the search direction before we change it. //pd = d; copygridnew(pd,d); // Update the output vector. //d *= lambda; output += d; //scalarmultadd(out,lambda,d); scalarmultcumnew(out,lambda,d); // grad now contains L(old_output + lambda*d) - input // Let's see how close we are to convergence. digits = (int) floor(-log(1.e-100+sqrt(gridcarrotnew(grad,grad)))/log(10.)); //dft_log("\niteration %d, digits = %d \n",iter,digits); dft_log("%d ",digits); dft_log_flush(); if(digits >= convdigits) anotherstep = 0; // We're done! iter++; #ifdef DFT_PROFILING gettimeofday(&time2,NULL); rtimepoi=1.*(time2.tv_sec-time1.tv_sec)+ (time2.tv_usec-time1.tv_usec)/1.e6; rtimepoicum+=rtimepoi; // dft_log("\nSingle and total iteration time: %.3f sec %.3f sec\n", // rtimepoi,rtimepoicum); #endif // DFT_PROFILING // Let's bail out if we haven't converged in dieiter iterations. if(iter > dieiter) die("\ninvL failed to converge to %d digits in %d iterations!\n", convdigits, dieiter); } //dft_log("\nTotal time spent in L (C only) = %.3f sec\n",cltime); dft_log("\n"); PutRe(out,output); // Output now contains invL(input). // Free temporaries. myfree(workspace1); myfree(workspace2); killgridnew(out); killgridnew(in); killgridnew(grad); killgridnew(d); killgridnew(pd); killgridnew(tempout); killgridnew(helpgrid); #ifdef DFT_PROFILING timerOff(32); // turn off invL timer #endif // DFT_PROFILING } //ipd: this is a hack to matt's nonorthorhombic laplacian so that we can //use different code for the orthorhombic cells void apply_invL(const WL_ComplexColumn &input, WL_ComplexColumn &output) { matrix3 l=input.basis_spec->lattice->GGT; if((l.m[0][1] == 0.0) && (l.m[0][2] == 0.0) && (l.m[1][2] == 0.0)\ && input.basis_spec->use_L_orthorhombic == TRUE ) apply_invL_ORTHORHOMBIC(input,output); else apply_invL_NORTHORHOMBIC(input,output); } void apply_O(const WL_ComplexColumn &input, WL_ComplexColumn &output) { #ifdef DFT_PROFILING timerOn(29); counterIncr(24); #endif // DFT_PROFILING if(input.representation == REALSPACE) dft_log("Trying to take O of realspace bundle.\n"); // Set the representation output.representation=input.representation; if(output.embedding!=input.embedding) die("Cannot do apply_O(%s, %s)! Write me!\n", input.getembedding(), output.getembedding()); Gridspec *gridspec; switch(input.embedding){ case SPARSE: gridspec = input.basis_spec->gridspec; break; case DENSE: gridspec = input.basis_spec->gridspec_dense; break; default: die("apply_L: Unknown input embedding %d\n",input.embedding); } // output.kvec=input.kvec; double kx, ky, kz; if(input.basis->qnum){ // hack it chargedensities don't have qnum kx=input.basis->qnum->kvec.v[0]; ky=input.basis->qnum->kvec.v[1]; kz=input.basis->qnum->kvec.v[2]; } else kx=ky=kz=0.; // Declare workspace variables for operators. double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)]; double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)]; real *workspace1, *workspace2; // Allocate workspaces workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace1", "apply_O"); workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real), "workspace2", "apply_O"); if (kx==0. && ky==0. && kz==0.) { Grid *tempin = mkgridnew(gridspec,NULL); Grid *tempout = mkgridnew(gridspec,NULL); Grid *helpgrid = mkgridnew(gridspec,NULL); GetRe(tempin,input); zeroghostrec(tempin); O(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); PutRe(tempout,output); GetIm(tempin,input); zeroghostrec(tempin); O(tempout,tempin, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); PutIm(tempout,output); killgridnew(tempin); killgridnew(tempout); killgridnew(helpgrid); } else { // create expanded top level to hold the top level with // proper wrapping when going to the neighbouring cells struct Grid FgridinRe, FgridoutRe,FgridinIm, FgridoutIm; struct Ivec xpnd = {HFL_O1, HFL_O1, HFL_O1}; struct Ivec rvrs = {-HFL_O1, -HFL_O1, -HFL_O1}; Grid *inre, *inim, *outre, *outim; inre = mkgridnew(gridspec,NULL); inim = mkgridnew(gridspec,NULL); outre = mkgridnew(gridspec,NULL); outim = mkgridnew(gridspec,NULL); GetRe(inre,input); GetIm(inim,input); FgridinRe.level = 0; FgridinRe.ifperiodic=non_periodic; FgridinRe.org.x = inre->org.x - xpnd.x; FgridinRe.org.y = inre->org.y - xpnd.y; FgridinRe.org.z = inre->org.z - xpnd.z; FgridinRe.dim.x = inre->dim.x + 2*xpnd.x; FgridinRe.dim.y = inre->dim.y + 2*xpnd.y; FgridinRe.dim.z = inre->dim.z + 2*xpnd.z; FgridinRe.scale = inre->scale; FgridinRe.parent = NULL; // it is the top level FgridinRe.nxyz = FgridinRe.dim.x*FgridinRe.dim.y*FgridinRe.dim.z; // FgridinRe.nxyzwholegrid = input.Re.Cgrid[0]->nxyzwholegrid - // input.Re.Cgrid[0]->nxyz + FgridinRe.nxyz; // do all of the above for FgridinIm,FgridoutRe, FgridoutIm FgridinIm=FgridinRe; FgridoutRe=FgridinRe; FgridoutIm=FgridinRe; // allocate the space for the data FgridinRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridinRe", "apply_O"); FgridinIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridinIm", "apply_O"); FgridoutRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridoutRe", "apply_O"); FgridoutIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real), "FgridoutIm", "apply_O"); Grid *helpgrid = mkgridnew(gridspec,NULL); // in order that Oabovenew works correctly helpgrid and ingrid have // to be identical structures on levels below toplevel // we must change the origin of helpgrid as well if(helpgrid->child) setnewparent(helpgrid->child,xpnd,helpgrid); // input.Re.Cgrid[ig]->child child of FgridinRe FgridinRe.child = inre->child; FgridinRe.sibling = inre->sibling; // the same for FgridinIm,FgridoutRe, FgridoutIm FgridinIm.child = inim->child; FgridinIm.sibling = inim->sibling; FgridoutRe.child = outre->child; FgridoutRe.sibling = outre->sibling; FgridoutIm.child = outim->child; FgridoutIm.sibling = outim->sibling; // make FgridinRe parent of input.Re.Cgrid[ig]->child and it siblings if(inre->child){ setnewparent(inre->child,xpnd,&FgridinRe); setnewparent(inim->child,xpnd,&FgridinIm); setnewparent(outre->child,xpnd,&FgridoutRe); setnewparent(outim->child,xpnd,&FgridoutIm); } // copy the data from top level ingrid.Re/Im to FgridinRe/Im Dvec oldk; oldk.x = kx; oldk.y = ky; oldk.z = kz; CopyBlochPad(&FgridinRe, &FgridinIm, inre, inim, oldk, xpnd); zeroghostrec(&FgridinRe); zeroghostrec(&FgridinIm); O(&FgridoutRe,&FgridinRe, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); O(&FgridoutIm,&FgridinIm, O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2); //zeroghostrec(out) is done inside L() // copy the data from FgridoutRe/Im top level back to // output.Re/Im.Cgrid[ig] CopyfromPadded(outre, &FgridoutRe, xpnd); CopyfromPadded(outim, &FgridoutIm, xpnd); // return the original parents of input.Re.Cgrid[ig] and // output.Re.Cgrid[i]; if(inre->child){ setnewparent(inre->child,rvrs,inre); setnewparent(inim->child,rvrs,inim); setnewparent(outre->child,rvrs,outre); setnewparent(outim->child,rvrs,outim); } // Put the grids back. PutRe(outre,output); PutIm(outim,output); // free the space myfree(FgridinRe.dat); myfree(FgridinIm.dat); myfree(FgridoutRe.dat); myfree(FgridoutIm.dat); // restore the origin of helpgrid as it was initially setnewparent(helpgrid->child,rvrs,helpgrid); // Free the Grid's. killgridnew(inre); killgridnew(inim); killgridnew(outre); killgridnew(outim); killgridnew(helpgrid); } myfree(workspace1); myfree(workspace2); #ifdef DFT_PROFILING timerOff(29); // turn off O timer #endif // DFT_PROFILING } void apply_Obar(const WL_ComplexColumn &input, WL_ComplexColumn &output) { static WL_ComplexColumn s(input); static int get_s = 1; static real omega = 0; // Only compute s once. if(get_s){ // S = O(J(1)) WL_ComplexColumn ones(input.basis); ones.init_scalarfield(*(input.basis), REALSPACE); ones.setones(); apply_J(ones,ones); apply_O(ones,s); // Get the unit cell volumn. omega = REAL(s^ones); //dft_log("Unit cell volume from Obar = %1.12lf\n",omega); get_s = 0; } // Obar = O - (1/vol)(s s^) double volfactor = (REAL(s^input))/omega; apply_O(input,output); //dft_log("integral(n) = %1.12lf\n",volfactor*omega); output -= volfactor*s; return; }