/*
DFT++ is a density functional package developed by the research group
of Professor Tomas Arias
Copyright 1996-2003 Sohrab Ismail-Beigi
This file is part of DFT++.
DFT++ is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DFT++ is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DFT++; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Please see the file CREDITS for a list of authors.
For academic users, we request that publications using results obtained with
this software reference
"New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).
and, if using the wavelet basis, further reference
"Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).
and
"Robust ab initio calculation of condensed matter: transparent convergence through
semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).
For your convenience, preprints of the above articles may be obtained from
http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/
#include "header.h"
// The maximum number of digits to which we can converge invL.
#define MAX_CONVDIGITS 11
void
CopyBlochPad(struct Grid *outre, struct Grid *outim,
struct Grid *inre, struct Grid *inim,
struct Dvec kvec, struct Ivec pad);
void
CopyfromPadded(struct Grid *output, struct Grid *input, struct Ivec pad);
void
apply_L_ORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
#ifdef DFT_PROFILING
timerOn(31); // turn on L timer
counterIncr(23);
#endif // DFT_PROFILING
if(input.representation == REALSPACE)
die("Trying to take L of realspace bundle.\n");
// Set the representation and quantum numbers of the output.
output.representation=input.representation;
if(output.embedding!=input.embedding)
die("Cannot do apply_L(%s, %s)! Write me!\n",
input.getembedding(), output.getembedding());
//get the gridspecs of the input (shouild be the same as output)
Gridspec *gridspec;
switch(input.embedding){
case SPARSE:
gridspec = input.basis_spec->gridspec;
break;
case DENSE:
gridspec = input.basis_spec->gridspec_dense;
break;
default:
die("apply_L: Unknown input embedding %d\n",input.embedding);
}
// output.kvec=input.kvec;
double kx, ky, kz;
if(input.basis->qnum){ // hack it chargedensities don't have qnum
kx=input.basis->qnum->kvec.v[0];
ky=input.basis->qnum->kvec.v[1];
kz=input.basis->qnum->kvec.v[2];
}
else kx=ky=kz=0.;
//dft_log("USING kpt: %lf %lf %lf\n",kx, ky, kz);
// Declare workspace variables for operators.
double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)];
double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)];
real *workspace1, *workspace2;
// Allocate workspaces
workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace1", "apply_L");
workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace2", "apply_L");
if (kx==0. && ky==0. && kz==0.) {
// no need of bloch phase
// Do the real part.
Grid *tempin = mkgridnew(gridspec,NULL);
Grid *tempout = mkgridnew(gridspec,NULL);
Grid *helpgrid = mkgridnew(gridspec,NULL);
GetRe(tempin,input);
zeroghostrec(tempin);
L(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
PutRe(tempout,output);
// Now the imaginary part/
GetIm(tempin,input);
zeroghostrec(tempin);
L(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
PutIm(tempout,output);
// Free the memory allocated to the grids.
killgridnew(tempin);
killgridnew(tempout);
killgridnew(helpgrid);
}
else {
// Here, we need two helper grids, since CopyBlochPad needs to deal with
// real and imaginary parts simultaneously.
Grid *input_re, *input_im, *output_re, *output_im;
input_re = mkgridnew(gridspec,NULL);
input_im = mkgridnew(gridspec,NULL);
output_re = mkgridnew(gridspec,NULL);
output_im = mkgridnew(gridspec,NULL);
GetRe(input_re,input);
GetIm(input_im,input);
// create expanded top level to hold the top level with
// proper wrapping when going to the neighbouring cells
struct Grid FgridinRe, FgridoutRe,FgridinIm, FgridoutIm;
struct Ivec xpnd = {HFL_L1, HFL_L1, HFL_L1};
struct Ivec rvrs = {-HFL_L1, -HFL_L1, -HFL_L1};
FgridinRe.level = 0;
// since Fgrid has the pieces from the neighbouring cells we
// do nonperiodic calculation
FgridinRe.ifperiodic=non_periodic;
FgridinRe.org.x = input_re->org.x - xpnd.x;
FgridinRe.org.y = input_re->org.y - xpnd.y;
FgridinRe.org.z = input_re->org.z - xpnd.z;
FgridinRe.dim.x = input_re->dim.x + 2*xpnd.x;
FgridinRe.dim.y = input_re->dim.y + 2*xpnd.y;
FgridinRe.dim.z = input_re->dim.z + 2*xpnd.z;
FgridinRe.scale = input_re->scale;
FgridinRe.parent = NULL; // it is the top level
FgridinRe.nxyz = FgridinRe.dim.x*FgridinRe.dim.y*FgridinRe.dim.z;
FgridinRe.nxyzwholegrid = -100000;// meaningles, not bgrid
// do all of the above for FgridinIm,FgridoutRe, FgridoutIm
FgridinIm=FgridinRe;
FgridoutRe=FgridinRe;
FgridoutIm=FgridinRe;
// allocate the space for the data
FgridinRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridinRe", "apply_L");
FgridinIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridinIm", "apply_L");
FgridoutRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridoutRe", "apply_L");
FgridoutIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridoutIm", "apply_L");
Grid *helpgrid = mkgridnew(gridspec,NULL);
// in order that Labovenew works correctly helpgrid and ingrid have
// to be identical structures on levels below toplevel
// we must change the origin of helpgrid as well
if(helpgrid->child)
setnewparent(helpgrid->child,xpnd,helpgrid);
// input.Re.Cgrid[ig]->child child of FgridinRe
FgridinRe.child = input_re->child;
FgridinRe.sibling = input_re->sibling;
// the same for FgridinIm,FgridoutRe, FgridoutIm
FgridinIm.child = input_im->child;
FgridinIm.sibling = input_im->sibling;
FgridoutRe.child = output_re->child;
FgridoutRe.sibling = output_re->sibling;
FgridoutIm.child = output_im->child;
FgridoutIm.sibling = output_im->sibling;
// make Fgrid parent of Xput.Cgrid[ig]->child and it siblings
if(input_re->child) {
setnewparent(input_re->child,xpnd,&FgridinRe);
setnewparent(input_im->child,xpnd,&FgridinIm);
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
// Copy the data to the padded grid w/ the appropriate
// boundary conditions
// The convention in the wavelet code is that the k-points should be
// in reciprocal coordinates. Here, however, the k-points are in
// fractional coordinates. CopyBlochPad converts them.
Dvec oldk;
oldk.x = kx;
oldk.y = ky;
oldk.z = kz;
CopyBlochPad(&FgridinRe, &FgridinIm, input_re, input_im, oldk, xpnd);
zeroghostrec(&FgridinRe);
zeroghostrec(&FgridinIm);
L(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2);
L(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2);
//zeroghostrec(out) is done inside L()
// copy the data from FgridoutRe/Im top level back to
// output.Re/Im.Cgrid[ig]
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
// return the original parents of input.Re.Cgrid[ig] and
// output.Re.Cgrid[i];
if(input_re->child) {
setnewparent(input_re->child,rvrs,input_re);
setnewparent(input_im->child,rvrs,input_im);
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Put the gridclasses back.
PutRe(output_re,output);
PutIm(output_im,output);
// free the memory of the fake top levels
myfree(FgridinRe.dat);
myfree(FgridinIm.dat);
myfree(FgridoutRe.dat);
myfree(FgridoutIm.dat);
// restore the origin of helpgrid as it was initially
setnewparent(helpgrid->child,rvrs,helpgrid);
// Free the Grid's.
killgridnew(input_re); killgridnew(input_im);
killgridnew(output_re); killgridnew(output_im);
killgridnew(helpgrid);
}
myfree(workspace1); myfree(workspace2);
#ifdef DFT_PROFILING
timerOff(31); // turn off L timer
#endif // DFT_PROFILING
}
// Works with non-orthorhombic cells! -- mhe 10/23/02
void
apply_L_NORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
#ifdef DFT_PROFILING
timerOn(31); // turn on L timer
counterIncr(23);
#endif // DFT_PROFILING
Lattice *lattice=input.basis_spec->lattice;
if(input.representation == REALSPACE)
die("Trying to take L of realspace bundle.\n");
// Set the representation and quantum numbers of the output.
output.representation=input.representation;
if(output.embedding!=input.embedding)
die("Cannot do apply_L(%s, %s)! Write me!\n",
input.getembedding(), output.getembedding());
//get the gridspecs of the input (shouild be the same as output)
Gridspec *gridspec;
switch(input.embedding){
case SPARSE:
gridspec = input.basis_spec->gridspec;
break;
case DENSE:
gridspec = input.basis_spec->gridspec_dense;
break;
default:
die("apply_L: Unknown input embedding %d\n",input.embedding);
}
// output.kvec=input.kvec;
double kx, ky, kz;
if(input.basis->qnum){ // hack it chargedensities don't have qnum
kx=input.basis->qnum->kvec.v[0];
ky=input.basis->qnum->kvec.v[1];
kz=input.basis->qnum->kvec.v[2];
}
else kx=ky=kz=0.;
//dft_log("USING kpt: %lf %lf %lf\n",kx, ky, kz);
// Get the grid dimensions.
int N1=gridspec[0].dim.x, N2=gridspec[0].dim.y, N3=gridspec[0].dim.z,
Ntot=N1*N2*N3;
double volfactor=lattice->unit_cell_volume/((double)Ntot*4*M_PI*M_PI);
// Declare workspace variables for operators.
double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)];
double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)];
real *workspace1, *workspace2;
// Allocate workspaces
workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace1", "apply_L");
workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace2", "apply_L");
if (kx==0. && ky==0. && kz==0.) {
// no need of bloch phase
// Do the real part.
Grid *tempin = mkgridnew(gridspec,NULL);
Grid *tempout = mkgridnew(gridspec,NULL);
Grid *helpgrid = mkgridnew(gridspec,NULL);
// This grid holds the sum of the various components of L.
Grid *sum = mkgridnew(gridspec,NULL);
evalgrid(sum,NULLDVEC,zero);
GetRe(tempin,input);
zeroghostrec(tempin);
// Do dx*dx.
if(lattice->GGT.m[0][0] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,0);
scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)),
tempout);
}
// Do dx*dy.
if(lattice->GGT.m[0][1] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,1);
scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)),
tempout);
}
// Do dx*dz.
if(lattice->GGT.m[0][2] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,2);
scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)),
tempout);
}
// Do dy*dy.
if(lattice->GGT.m[1][1] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,1);
scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)),
tempout);
}
// Do dy*dz.
if(lattice->GGT.m[1][2] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,2);
scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)),
tempout);
}
// Do dz*dz.
if(lattice->GGT.m[2][2] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,2,2);
scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)),
tempout);
}
// Finish up.
PutRe(sum,output);
// Now the imaginary part
evalgrid(sum,NULLDVEC,zero);
GetIm(tempin,input);
zeroghostrec(tempin);
// Do dx*dx.
if(lattice->GGT.m[0][0] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,0);
scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)),
tempout);
}
// Do dx*dy.
if(lattice->GGT.m[0][1] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,1);
scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)),
tempout);
}
// Do dx*dz.
if(lattice->GGT.m[0][2] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,0,2);
scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)),
tempout);
}
// Do dy*dy.
if(lattice->GGT.m[1][1] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,1);
scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)),
tempout);
}
// Do dy*dz.
if(lattice->GGT.m[1][2] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,1,2);
scalarmultcumnew(sum,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)),
tempout);
}
// Do dz*dz.
if(lattice->GGT.m[2][2] != 0.0)
{
cD(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2,2,2);
scalarmultcumnew(sum,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)),
tempout);
}
// Finish up.
PutIm(sum,output);
// Free the memory allocated to the grids.
killgridnew(tempin);
killgridnew(tempout);
killgridnew(helpgrid);
killgridnew(sum);
}
else {
// Here, we need two helper grids, since CopyBlochPad needs to deal with
// real and imaginary parts simultaneously.
Grid *input_re, *input_im, *output_re, *output_im, *sum_re, *sum_im;
input_re = mkgridnew(gridspec,NULL);
input_im = mkgridnew(gridspec,NULL);
output_re = mkgridnew(gridspec,NULL);
output_im = mkgridnew(gridspec,NULL);
sum_re = mkgridnew(gridspec,NULL);
sum_im = mkgridnew(gridspec,NULL);
evalgrid(sum_re,NULLDVEC,zero);
evalgrid(sum_im,NULLDVEC,zero);
GetRe(input_re,input);
GetIm(input_im,input);
// create expanded top level to hold the top level with
// proper wrapping when going to the neighbouring cells
struct Grid FgridinRe, FgridoutRe,FgridinIm, FgridoutIm;
struct Ivec xpnd = {HFL_L1, HFL_L1, HFL_L1};
struct Ivec rvrs = {-HFL_L1, -HFL_L1, -HFL_L1};
FgridinRe.level = 0;
// since Fgrid has the pieces from the neighbouring cells we
// do nonperiodic calculation
FgridinRe.ifperiodic=non_periodic;
FgridinRe.org.x = input_re->org.x - xpnd.x;
FgridinRe.org.y = input_re->org.y - xpnd.y;
FgridinRe.org.z = input_re->org.z - xpnd.z;
FgridinRe.dim.x = input_re->dim.x + 2*xpnd.x;
FgridinRe.dim.y = input_re->dim.y + 2*xpnd.y;
FgridinRe.dim.z = input_re->dim.z + 2*xpnd.z;
FgridinRe.scale = input_re->scale;
FgridinRe.parent = NULL; // it is the top level
FgridinRe.nxyz = FgridinRe.dim.x*FgridinRe.dim.y*FgridinRe.dim.z;
FgridinRe.nxyzwholegrid = -100000;// meaningles, not bgrid
// do all of the above for FgridinIm,FgridoutRe, FgridoutIm
FgridinIm=FgridinRe;
FgridoutRe=FgridinRe;
FgridoutIm=FgridinRe;
// allocate the space for the data
FgridinRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridinRe", "apply_L");
FgridinIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridinIm", "apply_L");
FgridoutRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridoutRe", "apply_L");
FgridoutIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridoutIm", "apply_L");
Grid *helpgrid = mkgridnew(gridspec,NULL);
// in order that Labovenew works correctly helpgrid and ingrid have
// to be identical structures on levels below toplevel
// we must change the origin of helpgrid as well
if(helpgrid->child)
setnewparent(helpgrid->child,xpnd,helpgrid);
// input.Re.Cgrid[ig]->child child of FgridinRe
FgridinRe.child = input_re->child;
FgridinRe.sibling = input_re->sibling;
// the same for FgridinIm,FgridoutRe, FgridoutIm
FgridinIm.child = input_im->child;
FgridinIm.sibling = input_im->sibling;
FgridoutRe.child = output_re->child;
FgridoutRe.sibling = output_re->sibling;
FgridoutIm.child = output_im->child;
FgridoutIm.sibling = output_im->sibling;
// make Fgrid parent of Xput.Cgrid[ig]->child and it siblings
if(input_re->child) {
setnewparent(input_re->child,xpnd,&FgridinRe);
setnewparent(input_im->child,xpnd,&FgridinIm);
}
// Copy the data to the padded grid w/ the appropriate
// boundary conditions
// The convention in the wavelet code is that the k-points should be
// in reciprocal coordinates. Here, however, the k-points are in
// fractional coordinates. CopyBlochPad converts them.
Dvec oldk;
oldk.x = kx;
oldk.y = ky;
oldk.z = kz;
CopyBlochPad(&FgridinRe, &FgridinIm, input_re, input_im, oldk, xpnd);
zeroghostrec(&FgridinRe);
zeroghostrec(&FgridinIm);
// Do dx*dx.
if(lattice->GGT.m[0][0] != 0.0)
{
if(output_re->child)
{
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
cD(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,0);
cD(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,0);
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
if(output_re->child) {
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Add to sum.
scalarmultcumnew(sum_re,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)),
output_re);
scalarmultcumnew(sum_im,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)),
output_im);
}
// Do dx*dy.
if(lattice->GGT.m[0][1] != 0.0)
{
if(output_re->child)
{
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
cD(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,1);
cD(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,1);
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
if(output_re->child) {
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Add to sum.
scalarmultcumnew(sum_re,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)),
output_re);
scalarmultcumnew(sum_im,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)),
output_im);
}
// Do dx*dz.
if(lattice->GGT.m[0][2] != 0.0)
{
if(output_re->child)
{
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
cD(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,2);
cD(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,2);
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
if(output_re->child) {
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Add to sum.
scalarmultcumnew(sum_re,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)),
output_re);
scalarmultcumnew(sum_im,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)),
output_im);
}
// Do dy*dy.
if(lattice->GGT.m[1][1] != 0.0)
{
if(output_re->child)
{
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
cD(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,1,1);
cD(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,1,1);
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
if(output_re->child) {
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Add to sum.
scalarmultcumnew(sum_re,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)),
output_re);
scalarmultcumnew(sum_im,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)),
output_im);
}
// Do dy*dz.
if(lattice->GGT.m[1][2] != 0.0)
{
if(output_re->child)
{
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
cD(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,1,2);
cD(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,1,2);
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
if(output_re->child) {
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Add to sum.
scalarmultcumnew(sum_re,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)),
output_re);
scalarmultcumnew(sum_im,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)),
output_im);
}
// Do dz*dz.
if(lattice->GGT.m[2][2] != 0.0)
{
if(output_re->child)
{
setnewparent(output_re->child,xpnd,&FgridoutRe);
setnewparent(output_im->child,xpnd,&FgridoutIm);
}
cD(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,2,2);
cD(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,2,2);
CopyfromPadded(output_re, &FgridoutRe, xpnd);
CopyfromPadded(output_im, &FgridoutIm, xpnd);
if(output_re->child) {
setnewparent(output_re->child,rvrs,output_re);
setnewparent(output_im->child,rvrs,output_im);
}
// Add to sum.
scalarmultcumnew(sum_re,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)),
output_re);
scalarmultcumnew(sum_im,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)),
output_im);
}
if(input_re->child)
{
setnewparent(input_re->child,rvrs,input_re);
setnewparent(input_im->child,rvrs,input_im);
}
// Put the gridclasses back.
PutRe(sum_re,output);
PutIm(sum_im,output);
// free the memory of the fake top levels
myfree(FgridinRe.dat);
myfree(FgridinIm.dat);
myfree(FgridoutRe.dat);
myfree(FgridoutIm.dat);
// restore the origin of helpgrid as it was initially
setnewparent(helpgrid->child,rvrs,helpgrid);
// Free the Grid's.
killgridnew(input_re); killgridnew(input_im);
killgridnew(output_re); killgridnew(output_im);
killgridnew(sum_re); killgridnew(sum_im);
killgridnew(helpgrid);
}
myfree(workspace1); myfree(workspace2);
#ifdef DFT_PROFILING
timerOff(31); // turn off L timer
#endif // DFT_PROFILING
}
//ipd: this is a hack to matt's nonorthorhombic laplacian so that we can
//use different code for the orthorhombic cells
void
apply_L(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
matrix3 l=input.basis_spec->lattice->GGT;
if((l.m[0][1] == 0.0) && (l.m[0][2] == 0.0) && (l.m[1][2] == 0.0)\
&& (input.basis_spec->use_L_orthorhombic == TRUE))
apply_L_ORTHORHOMBIC(input,output);
else
apply_L_NORTHORHOMBIC(input,output);
}
// The invL operator ( p = invL(q) ) is obtained via a variational solution of:
// L(p) = q, minimizing F[p] = (1/2)*p^L(p) - p^q
// note: q = input, p = output
// We assume that the input column has zero total integral! (constants are
// in the null space of L)
// This is the old version, whose algorithm doesn't exploit linearity, and
// which uses complex operators and temporaries.
void
apply_invL_OLD(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
#ifdef DFT_PROFILING
timerOn(32); // turn on invL timer
#endif // DFT_PROFILING
// We have to figure out how to pass this into invL!!!
int digits, convdigits, iter = 0, anotherstep = 1;
// If we do more iterations than dieiter, we should quit.
const int dieiter = 200;
double F, a, b, lambda, gradmag, pgradmag;
WL_ComplexColumn grad(input), pgrad(input), temp(input),
d(input), pd(input);
#ifdef DFT_PROFILING
// Declare timeval's so that we can time each iteration.
timeval time1,time2;
double rtimepoi,rtimepoicum=0;
#endif // DFT_PROFILING
// Start with a zero initial solution.
output.zero_out();
// HACK ALERT!!!
// Calculate the number of convergence digits using global_abs2Yg.
convdigits = 4-(int) floor(log((1.e-100+sqrt(global_abs2Yg)))/log(10.));
if(convdigits > MAX_CONVDIGITS)
convdigits = MAX_CONVDIGITS;
dft_log("\nConvdigits is %d in Poisson solver.\n");
while(anotherstep)
{
#ifdef DFT_PROFILING
gettimeofday(&time1,NULL);
#endif // DFT_PROFILING
// Set the previous gradient before we compute a new one.
pgrad = grad;
pgradmag = gradmag;
// Calculate the functional F = (1/2)*p^L(p) - p^q
apply_L(output,temp); // temp = L(p)
F = (0.5)*(REAL(output^temp)) - (REAL(output^input));
// Calculate the gradient dF = (1/2)*L(p) - (1/2)*q
grad = temp; grad -= input; grad *= 0.5;
//dft_log("|grad|^2 = %1.12lf\n",REAL(grad^grad));
apply_K(grad,d);
// Store grad^grad for the conjugacy parameter.
gradmag = REAL(grad^d);
if(iter > 0)
{
// beyond first iteration:
// d_n = g_n + (g_n^g_n)/(g_(n-1)^g_(n-1))*d_(n-1)
pd *= (gradmag/pgradmag);
d += pd;
}
// Given a search direction, do the line minimization.
// p_(n+1) = p_n + lambda*d_n
// Compute the fit parameters to the parabola:
// F(lambda) = a*lambda^2 + b*lambda + c = 0
// c = F
// b = grad^d
// a = (1/2)*(d^L(d))
b = 2.0*REAL(grad^d);
apply_L(d,temp); a = 0.5*(REAL(d^temp));
// dF/d(lambda) ==> lambda_0 = -b/(2*a)
lambda = (-1.0*b)/(2.0*a);
// Store the search direction before we change it.
pd = d;
// Update the output vector.
d *= lambda; output += d;
iter++;
// Let's see how close we are to convergence.
digits = (int) floor(-log(1.e-100+sqrt(REAL(grad^grad)))/log(10.));
dft_log("\niteration %d, digits = %d \n",iter,digits);
if(digits >= convdigits)
anotherstep = 0; // We're done!
#ifdef DFT_PROFILING
gettimeofday(&time2,NULL);
rtimepoi=1.*(time2.tv_sec-time1.tv_sec)+
(time2.tv_usec-time1.tv_usec)/1.e6;
rtimepoicum+=rtimepoi;
dft_log("Single and total iteration time: %.3f sec %.3f sec\n",
rtimepoi,rtimepoicum);
#endif // DFT_PROFILING
// Let's bail out if we haven't converged in dieiter iterations.
if(iter > dieiter)
die("invL failed to converge to %d digits in %d iterations!\n",
convdigits, dieiter);
}
dft_log("\n",iter,digits); dft_log_flush();
// Output now contains invL(input).
#ifdef DFT_PROFILING
timerOff(32); // turn off invL timer
#endif // DFT_PROFILING
}
// This version of apply_invLreal uses only the C-level grid structures,
// to save memory. When we construct WL_RealColumn, this will be made
// obsolete. -- mhe 3/20/02
void
apply_invL_ORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
#ifdef DFT_PROFILING
timerOn(32); // turn on invL timer
#endif // DFT_PROFILING
// We have to figure out how to pass this into invL!!!
int digits, convdigits, iter = 0, anotherstep = 1;
// If we do more iterations than dieiter, we should quit.
const int dieiter = 500;
double a, b, lambda, gradmag, pgradmag;
// WL_ComplexColumn grad(input), d(input), pd(input);
Gridspec *gridspec;
// Before we use this routine, we'd better make sure that we're actually
// operating on a real column!
double imagmag = 0.0;
for(int ii=0; ii < input.length; ii++)
imagmag += fabs(input.data.d[ii].y);
if(imagmag > ((1.e-12)*input.length))
{
dft_log("\nImaginary magnitude is %1.12lf in solve_poisson.\n",imagmag);
die("Are you sure you want to use a complex column here?\n");
}
switch(input.embedding){
case SPARSE:
gridspec = input.basis_spec->gridspec;
break;
case DENSE:
gridspec = input.basis_spec->gridspec_dense;
break;
default:
die("apply_L: Unknown input embedding %d\n",input.embedding);
}
// Set up the PCG work grids.
Grid *grad = mkgridnew(gridspec,NULL);
Grid *d = mkgridnew(gridspec,NULL);
Grid *pd = mkgridnew(gridspec,NULL);
// Set up a workspace for the L operator.
// Declare workspace variables for operators.
double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)];
double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)];
real *workspace1, *workspace2;
// Allocate workspaces
workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace1", "apply_L");
workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace2", "apply_L");
Grid *helpgrid = mkgridnew(gridspec,NULL);
// Grab the real part of the input grid.
Grid *in = mkgridnew(gridspec,NULL);
GetRe(in,input);
// Set up a grid for output.
Grid *out = mkgridnew(gridspec,NULL);
#ifdef DFT_PROFILING
// Declare timeval's so that we can time each iteration.
timeval time1,time2;
double rtimepoi,rtimepoicum=0;
#endif // DFT_PROFILING
// Start with a zero initial solution.
evalgrid(out,NULLDVEC,zero);
evalgrid(grad,NULLDVEC,zero);
// grad = 0.5*L(output) - 0.5*input = -0.5*input
//grad -= input; grad *= 0.5;
//scalarmultsubtract(grad,0.5,in);
scalarmultcumnew(grad,-0.5,in);
// HACK ALERT!!!
// Calculate the number of convergence digits using global_abs2Yg.
convdigits = 4-(int) floor(log((1.e-100+sqrt(global_abs2Yg)))/log(10.));
if(convdigits > MAX_CONVDIGITS)
convdigits = MAX_CONVDIGITS;
dft_log("\nConvdigits is %d in Poisson solver.\n",convdigits);
//cltime = 0.;
while(anotherstep)
{
#ifdef DFT_PROFILING
gettimeofday(&time1,NULL);
#endif // DFT_PROFILING
// Set the previous gradient before we compute a new one.
pgradmag = gradmag;
//apply_Kreal(grad,d);
copygridnew(d,grad);
preconddiag(d,1);
// Store grad^grad for the conjugacy parameter.
//gradmag = dot(grad,d);
gradmag = gridcarrotnew(grad,d);
if(iter > 0)
{
// beyond first iteration:
// d_n = g_n + (g_n^g_n)/(g_(n-1)^g_(n-1))*d_(n-1)
//pd *= (gradmag/pgradmag);
//d += pd;
//scalarmultadd(d,(gradmag/pgradmag),pd);
scalarmultcumnew(d,(gradmag/pgradmag),pd);
}
// Given a search direction, do the line minimization.
// p_(n+1) = p_n + lambda*d_n
// Compute the fit parameters to the parabola:
// F(lambda) = a*lambda^2 + b*lambda + c = 0
// c = F
// b = grad^d
// a = (1/2)*(d^L(d))
b = 2.0*gridcarrotnew(grad,d);
// apply_Lreal(d,pd); a = 0.5*(dot(d,pd));
#ifdef DFT_PROFILING
timerOn(31);
counterIncr(23);
#endif // DFT_PROFILING
L(pd,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
#ifdef DFT_PROFILING
timerOff(31);
#endif // DFT_PROFILING
a = 0.5*gridcarrotnew(d,pd);
// dF/d(lambda) = 0 --> lambda_0 = -b/(2*a)
lambda = (-1.0*b)/(2.0*a);
//dft_log("%1.12lf\n",lambda);
// Set the current gradient to the updated gradient.
// Use pd = L(d).
//pd *= (0.5*lambda); grad += pd;
//scalarmultadd(grad,(0.5*lambda),pd);
scalarmultcumnew(grad,(0.5*lambda),pd);
// Store the search direction before we change it.
//pd = d;
copygridnew(pd,d);
// Update the output vector.
//d *= lambda; output += d;
//scalarmultadd(out,lambda,d);
scalarmultcumnew(out,lambda,d);
// grad now contains L(old_output + lambda*d) - input
// Let's see how close we are to convergence.
digits = (int) floor(-log(1.e-100+sqrt(gridcarrotnew(grad,grad)))/log(10.));
//dft_log("\niteration %d, digits = %d \n",iter,digits);
dft_log("%d ",digits); dft_log_flush();
if(digits >= convdigits)
anotherstep = 0; // We're done!
iter++;
#ifdef DFT_PROFILING
gettimeofday(&time2,NULL);
rtimepoi=1.*(time2.tv_sec-time1.tv_sec)+
(time2.tv_usec-time1.tv_usec)/1.e6;
rtimepoicum+=rtimepoi;
// dft_log("\nSingle and total iteration time: %.3f sec %.3f sec\n",
// rtimepoi,rtimepoicum);
#endif // DFT_PROFILING
// Let's bail out if we haven't converged in dieiter iterations.
if(iter > dieiter)
die("\ninvL failed to converge to %d digits in %d iterations!\n",
convdigits, dieiter);
}
//dft_log("\nTotal time spent in L (C only) = %.3f sec\n",cltime);
dft_log("\n");
PutRe(out,output);
// Output now contains invL(input).
// Free temporaries.
myfree(workspace1); myfree(workspace2);
killgridnew(out); killgridnew(in);
killgridnew(grad); killgridnew(d); killgridnew(pd);
killgridnew(helpgrid);
#ifdef DFT_PROFILING
timerOff(32); // turn off invL timer
#endif // DFT_PROFILING
}
// This version of apply_invLreal uses only the C-level grid structures,
// to save memory. When we construct WL_RealColumn, this will be made
// obsolete. -- mhe 3/20/02
void
apply_invL_NORTHORHOMBIC(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
#ifdef DFT_PROFILING
timerOn(32); // turn on invL timer
#endif // DFT_PROFILING
// We have to figure out how to pass this into invL!!!
int digits, convdigits, iter = 0, anotherstep = 1;
// If we do more iterations than dieiter, we should quit.
const int dieiter = 500;
double a, b, lambda, gradmag, pgradmag;
// WL_ComplexColumn grad(input), d(input), pd(input);
Gridspec *gridspec;
Lattice *lattice=input.basis_spec->lattice;
// Before we use this routine, we'd better make sure that we're actually
// operating on a real column!
double imagmag = 0.0;
for(int ii=0; ii < input.length; ii++)
imagmag += fabs(input.data.d[ii].y);
if(imagmag > ((1.e-12)*input.length))
{
dft_log("\nImaginary magnitude is %1.12lf in solve_poisson.\n",imagmag);
die("Are you sure you want to use a complex column here?\n");
}
switch(input.embedding){
case SPARSE:
gridspec = input.basis_spec->gridspec;
break;
case DENSE:
gridspec = input.basis_spec->gridspec_dense;
break;
default:
die("apply_L: Unknown input embedding %d\n",input.embedding);
}
// Get the grid dimensions.
int N1=gridspec[0].dim.x, N2=gridspec[0].dim.y, N3=gridspec[0].dim.z,
Ntot=N1*N2*N3;
double volfactor=lattice->unit_cell_volume/((double)Ntot*4.0*M_PI*M_PI);
// Set up the PCG work grids.
Grid *grad = mkgridnew(gridspec,NULL);
Grid *d = mkgridnew(gridspec,NULL);
Grid *pd = mkgridnew(gridspec,NULL);
Grid *tempout = mkgridnew(gridspec,NULL);
evalgrid(tempout,NULLDVEC,zero);
// Set up a workspace for the L operator.
// Declare workspace variables for operators.
double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)];
double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)];
real *workspace1, *workspace2;
// Allocate workspaces
workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace1", "apply_L");
workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace2", "apply_L");
Grid *helpgrid = mkgridnew(gridspec,NULL);
// Grab the real part of the input grid.
Grid *in = mkgridnew(gridspec,NULL);
GetRe(in,input);
// Set up a grid for output.
Grid *out = mkgridnew(gridspec,NULL);
#ifdef DFT_PROFILING
// Declare timeval's so that we can time each iteration.
timeval time1,time2;
double rtimepoi,rtimepoicum=0;
#endif // DFT_PROFILING
// Start with a zero initial solution.
evalgrid(out,NULLDVEC,zero);
evalgrid(grad,NULLDVEC,zero);
// grad = 0.5*L(output) - 0.5*input = -0.5*input
//grad -= input; grad *= 0.5;
//scalarmultsubtract(grad,0.5,in);
scalarmultcumnew(grad,-0.5,in);
// HACK ALERT!!!
// Calculate the number of convergence digits using global_abs2Yg.
convdigits = 4-(int) floor(log((1.e-100+sqrt(global_abs2Yg)))/log(10.));
if(convdigits > MAX_CONVDIGITS)
convdigits = MAX_CONVDIGITS;
dft_log("\nConvdigits is %d in Poisson solver.\n",convdigits);
//cltime = 0.;
while(anotherstep)
{
#ifdef DFT_PROFILING
gettimeofday(&time1,NULL);
#endif // DFT_PROFILING
// Set the previous gradient before we compute a new one.
pgradmag = gradmag;
//apply_Kreal(grad,d);
copygridnew(d,grad);
preconddiag(d,1);
// Store grad^grad for the conjugacy parameter.
//gradmag = dot(grad,d);
gradmag = gridcarrotnew(grad,d);
if(iter > 0)
{
// beyond first iteration:
// d_n = g_n + (g_n^g_n)/(g_(n-1)^g_(n-1))*d_(n-1)
//pd *= (gradmag/pgradmag);
//d += pd;
//scalarmultadd(d,(gradmag/pgradmag),pd);
scalarmultcumnew(d,(gradmag/pgradmag),pd);
}
// Given a search direction, do the line minimization.
// p_(n+1) = p_n + lambda*d_n
// Compute the fit parameters to the parabola:
// F(lambda) = a*lambda^2 + b*lambda + c = 0
// c = F
// b = grad^d
// a = (1/2)*(d^L(d))
b = 2.0*gridcarrotnew(grad,d);
// apply_Lreal(d,pd); a = 0.5*(dot(d,pd));
// DEBUG!!!
//dft_log("volfactor = %.6lf N1=%d N2=%d N3=%d\n",volfactor,N1,N2,N3);
#ifdef DFT_PROFILING
timerOn(31);
counterIncr(23);
#endif // DFT_PROFILING
//L(test,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
// Non-orthrhombic L
// Do dx*dx.
evalgrid(pd,NULLDVEC,zero);
if(lattice->GGT.m[0][0] != 0.0)
{
cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,0);
scalarmultcumnew(pd,(volfactor*(lattice->GGT.m[0][0])*(N1*N1)),
tempout);
}
// Do dx*dy.
if(lattice->GGT.m[0][1] != 0.0)
{
cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,1);
scalarmultcumnew(pd,(2.*volfactor*(lattice->GGT.m[0][1])*(N1*N2)),
tempout);
dft_log("got here\n");
}
// Do dx*dz.
if(lattice->GGT.m[0][2] != 0.0)
{
cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,0,2);
scalarmultcumnew(pd,(2.*volfactor*(lattice->GGT.m[0][2])*(N1*N3)),
tempout);
dft_log("got here\n");
}
// Do dy*dy.
if(lattice->GGT.m[1][1] != 0.0)
{
cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,1,1);
scalarmultcumnew(pd,(volfactor*(lattice->GGT.m[1][1])*(N2*N2)),
tempout);
}
// Do dy*dz.
if(lattice->GGT.m[1][2] != 0.0)
{
cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,1,2);
scalarmultcumnew(pd,(2.*volfactor*(lattice->GGT.m[1][2])*(N2*N3)),
tempout);
dft_log("got here\n");
}
// Do dz*dz.
if(lattice->GGT.m[2][2] != 0.0)
{
cD(tempout,d,O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,
helpgrid,workspace1,workspace2,2,2);
scalarmultcumnew(pd,(volfactor*(lattice->GGT.m[2][2])*(N3*N3)),
tempout);
}
#ifdef DFT_PROFILING
timerOff(31);
#endif // DFT_PROFILING
a = 0.5*gridcarrotnew(d,pd);
// dF/d(lambda) = 0 --> lambda_0 = -b/(2*a)
lambda = (-1.0*b)/(2.0*a);
//dft_log("%1.12lf\n",lambda);
// Set the current gradient to the updated gradient.
// Use pd = L(d).
//pd *= (0.5*lambda); grad += pd;
//scalarmultadd(grad,(0.5*lambda),pd);
scalarmultcumnew(grad,(0.5*lambda),pd);
// Store the search direction before we change it.
//pd = d;
copygridnew(pd,d);
// Update the output vector.
//d *= lambda; output += d;
//scalarmultadd(out,lambda,d);
scalarmultcumnew(out,lambda,d);
// grad now contains L(old_output + lambda*d) - input
// Let's see how close we are to convergence.
digits = (int) floor(-log(1.e-100+sqrt(gridcarrotnew(grad,grad)))/log(10.));
//dft_log("\niteration %d, digits = %d \n",iter,digits);
dft_log("%d ",digits); dft_log_flush();
if(digits >= convdigits)
anotherstep = 0; // We're done!
iter++;
#ifdef DFT_PROFILING
gettimeofday(&time2,NULL);
rtimepoi=1.*(time2.tv_sec-time1.tv_sec)+
(time2.tv_usec-time1.tv_usec)/1.e6;
rtimepoicum+=rtimepoi;
// dft_log("\nSingle and total iteration time: %.3f sec %.3f sec\n",
// rtimepoi,rtimepoicum);
#endif // DFT_PROFILING
// Let's bail out if we haven't converged in dieiter iterations.
if(iter > dieiter)
die("\ninvL failed to converge to %d digits in %d iterations!\n",
convdigits, dieiter);
}
//dft_log("\nTotal time spent in L (C only) = %.3f sec\n",cltime);
dft_log("\n");
PutRe(out,output);
// Output now contains invL(input).
// Free temporaries.
myfree(workspace1); myfree(workspace2);
killgridnew(out); killgridnew(in);
killgridnew(grad); killgridnew(d); killgridnew(pd);
killgridnew(tempout);
killgridnew(helpgrid);
#ifdef DFT_PROFILING
timerOff(32); // turn off invL timer
#endif // DFT_PROFILING
}
//ipd: this is a hack to matt's nonorthorhombic laplacian so that we can
//use different code for the orthorhombic cells
void
apply_invL(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
matrix3 l=input.basis_spec->lattice->GGT;
if((l.m[0][1] == 0.0) && (l.m[0][2] == 0.0) && (l.m[1][2] == 0.0)\
&& input.basis_spec->use_L_orthorhombic == TRUE )
apply_invL_ORTHORHOMBIC(input,output);
else
apply_invL_NORTHORHOMBIC(input,output);
}
void
apply_O(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
#ifdef DFT_PROFILING
timerOn(29);
counterIncr(24);
#endif // DFT_PROFILING
if(input.representation == REALSPACE)
dft_log("Trying to take O of realspace bundle.\n");
// Set the representation
output.representation=input.representation;
if(output.embedding!=input.embedding)
die("Cannot do apply_O(%s, %s)! Write me!\n",
input.getembedding(), output.getembedding());
Gridspec *gridspec;
switch(input.embedding){
case SPARSE:
gridspec = input.basis_spec->gridspec;
break;
case DENSE:
gridspec = input.basis_spec->gridspec_dense;
break;
default:
die("apply_L: Unknown input embedding %d\n",input.embedding);
}
// output.kvec=input.kvec;
double kx, ky, kz;
if(input.basis->qnum){ // hack it chargedensities don't have qnum
kx=input.basis->qnum->kvec.v[0];
ky=input.basis->qnum->kvec.v[1];
kz=input.basis->qnum->kvec.v[2];
}
else kx=ky=kz=0.;
// Declare workspace variables for operators.
double O1lr[3*(HFL_O1+1)], O1lf[3*(HFL_O1+1)], O1ls[3*(HFL_O1+1)];
double O2lr[3*(HFL_O2+1)], O2lf[3*(HFL_O2+1)], O2ls[3*(HFL_O2+1)];
real *workspace1, *workspace2;
// Allocate workspaces
workspace1= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace1", "apply_O");
workspace2= (real *) mymalloc(input.basis->lwork*sizeof(real),
"workspace2", "apply_O");
if (kx==0. && ky==0. && kz==0.) {
Grid *tempin = mkgridnew(gridspec,NULL);
Grid *tempout = mkgridnew(gridspec,NULL);
Grid *helpgrid = mkgridnew(gridspec,NULL);
GetRe(tempin,input);
zeroghostrec(tempin);
O(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
PutRe(tempout,output);
GetIm(tempin,input);
zeroghostrec(tempin);
O(tempout,tempin,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
PutIm(tempout,output);
killgridnew(tempin);
killgridnew(tempout);
killgridnew(helpgrid);
}
else {
// create expanded top level to hold the top level with
// proper wrapping when going to the neighbouring cells
struct Grid FgridinRe, FgridoutRe,FgridinIm, FgridoutIm;
struct Ivec xpnd = {HFL_O1, HFL_O1, HFL_O1};
struct Ivec rvrs = {-HFL_O1, -HFL_O1, -HFL_O1};
Grid *inre, *inim, *outre, *outim;
inre = mkgridnew(gridspec,NULL);
inim = mkgridnew(gridspec,NULL);
outre = mkgridnew(gridspec,NULL);
outim = mkgridnew(gridspec,NULL);
GetRe(inre,input);
GetIm(inim,input);
FgridinRe.level = 0;
FgridinRe.ifperiodic=non_periodic;
FgridinRe.org.x = inre->org.x - xpnd.x;
FgridinRe.org.y = inre->org.y - xpnd.y;
FgridinRe.org.z = inre->org.z - xpnd.z;
FgridinRe.dim.x = inre->dim.x + 2*xpnd.x;
FgridinRe.dim.y = inre->dim.y + 2*xpnd.y;
FgridinRe.dim.z = inre->dim.z + 2*xpnd.z;
FgridinRe.scale = inre->scale;
FgridinRe.parent = NULL; // it is the top level
FgridinRe.nxyz = FgridinRe.dim.x*FgridinRe.dim.y*FgridinRe.dim.z;
// FgridinRe.nxyzwholegrid = input.Re.Cgrid[0]->nxyzwholegrid -
// input.Re.Cgrid[0]->nxyz + FgridinRe.nxyz;
// do all of the above for FgridinIm,FgridoutRe, FgridoutIm
FgridinIm=FgridinRe;
FgridoutRe=FgridinRe;
FgridoutIm=FgridinRe;
// allocate the space for the data
FgridinRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridinRe", "apply_O");
FgridinIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridinIm", "apply_O");
FgridoutRe.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridoutRe", "apply_O");
FgridoutIm.dat= (real *) mymalloc(FgridinRe.nxyz*sizeof(real),
"FgridoutIm", "apply_O");
Grid *helpgrid = mkgridnew(gridspec,NULL);
// in order that Oabovenew works correctly helpgrid and ingrid have
// to be identical structures on levels below toplevel
// we must change the origin of helpgrid as well
if(helpgrid->child)
setnewparent(helpgrid->child,xpnd,helpgrid);
// input.Re.Cgrid[ig]->child child of FgridinRe
FgridinRe.child = inre->child;
FgridinRe.sibling = inre->sibling;
// the same for FgridinIm,FgridoutRe, FgridoutIm
FgridinIm.child = inim->child;
FgridinIm.sibling = inim->sibling;
FgridoutRe.child = outre->child;
FgridoutRe.sibling = outre->sibling;
FgridoutIm.child = outim->child;
FgridoutIm.sibling = outim->sibling;
// make FgridinRe parent of input.Re.Cgrid[ig]->child and it siblings
if(inre->child){
setnewparent(inre->child,xpnd,&FgridinRe);
setnewparent(inim->child,xpnd,&FgridinIm);
setnewparent(outre->child,xpnd,&FgridoutRe);
setnewparent(outim->child,xpnd,&FgridoutIm);
}
// copy the data from top level ingrid.Re/Im to FgridinRe/Im
Dvec oldk;
oldk.x = kx;
oldk.y = ky;
oldk.z = kz;
CopyBlochPad(&FgridinRe, &FgridinIm, inre, inim, oldk, xpnd);
zeroghostrec(&FgridinRe);
zeroghostrec(&FgridinIm);
O(&FgridoutRe,&FgridinRe,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
O(&FgridoutIm,&FgridinIm,
O1lf,O1lr,O1ls,O2lf,O2lr,O2ls,helpgrid,workspace1,workspace2);
//zeroghostrec(out) is done inside L()
// copy the data from FgridoutRe/Im top level back to
// output.Re/Im.Cgrid[ig]
CopyfromPadded(outre, &FgridoutRe, xpnd);
CopyfromPadded(outim, &FgridoutIm, xpnd);
// return the original parents of input.Re.Cgrid[ig] and
// output.Re.Cgrid[i];
if(inre->child){
setnewparent(inre->child,rvrs,inre);
setnewparent(inim->child,rvrs,inim);
setnewparent(outre->child,rvrs,outre);
setnewparent(outim->child,rvrs,outim);
}
// Put the grids back.
PutRe(outre,output);
PutIm(outim,output);
// free the space
myfree(FgridinRe.dat);
myfree(FgridinIm.dat);
myfree(FgridoutRe.dat);
myfree(FgridoutIm.dat);
// restore the origin of helpgrid as it was initially
setnewparent(helpgrid->child,rvrs,helpgrid);
// Free the Grid's.
killgridnew(inre); killgridnew(inim);
killgridnew(outre); killgridnew(outim);
killgridnew(helpgrid);
}
myfree(workspace1); myfree(workspace2);
#ifdef DFT_PROFILING
timerOff(29); // turn off O timer
#endif // DFT_PROFILING
}
void
apply_Obar(const WL_ComplexColumn &input, WL_ComplexColumn &output)
{
static WL_ComplexColumn s(input);
static int get_s = 1;
static real omega = 0;
// Only compute s once.
if(get_s){
// S = O(J(1))
WL_ComplexColumn ones(input.basis);
ones.init_scalarfield(*(input.basis), REALSPACE);
ones.setones();
apply_J(ones,ones);
apply_O(ones,s);
// Get the unit cell volumn.
omega = REAL(s^ones);
//dft_log("Unit cell volume from Obar = %1.12lf\n",omega);
get_s = 0;
}
// Obar = O - (1/vol)(s s^)
double volfactor = (REAL(s^input))/omega;
apply_O(input,output);
//dft_log("integral(n) = %1.12lf\n",volfactor*omega);
output -= volfactor*s;
return;
}
syntax highlighted by Code2HTML, v. 0.9.1