/*
DFT++ is a density functional package developed by the research group
of Professor Tomas Arias
Copyright 1996-2003 Sohrab Ismail-Beigi
This file is part of DFT++.
DFT++ is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DFT++ is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DFT++; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Please see the file CREDITS for a list of authors.
For academic users, we request that publications using results obtained with
this software reference
"New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).
and, if using the wavelet basis, further reference
"Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).
and
"Robust ab initio calculation of condensed matter: transparent convergence through
semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).
For your convenience, preprints of the above articles may be obtained from
http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/
/*
* Code by Sohrab Ismail-Beigi, Mar. 1997
*
* Non-local pseudopotentials, with multiple-projector extension of
* basic Kleinman-Bylander form.
*
*/
/* $Id: PW_nlpot.cpp,v 1.1.2.9 2003/05/29 18:54:03 ivan Exp $ */
#include "header.h"
void accumulate_Vnl(const ColumnBundle &Y, ColumnBundle &HspY, Everything &e)
{
int i, sp, lm;
// Nonlocal part of potential: sum(ions,l,m,...) { Vnl*Mnl*Vnl^Y }
for (sp=0; sp < e.ioninfo.nspecies; sp++)
for (lm=0; lm < e.ioninfo.species[sp].pot.nlm; lm++){
if (e.ioninfo.species[sp].pot.ngamma[lm] > 1) {
dft_log(DFT_SILENCE,
"\nMultiple-projectors: running slow Hsp()!\n");
/* this is the slow way where we go one atom at a time...
* the smarter way would be to somehow make a new class
* which is a block-diagonal matrix class (a string of matrix
* classes on the diagonal of a bigger one), where each
* diagonal is just Mnl below, and to define an
* block_diag_matrix*matrix (returning matrix) operator.
* Then we can do what we do with the Kleinman-Bylander
* below with minimal changes. */
ColumnBundle Vnl(e.ioninfo.species[sp].pot.ngamma[lm],
Y.col[0]->basis,"local");
/* reference */
ComplexMatrix &Mnl = e.ioninfo.species[sp].pot.M[lm];
for (i=0; i < e.ioninfo.species[sp].natoms; i++) {
Vnl_pseudo(sp,i,lm,&(e.ioninfo),Vnl);
HspY += Vnl*(Mnl*(Vnl^Y));
}
}
/* Kleinman-Bylander: bunch up all local potentials for
* the atoms of this species and state into a big ColumnBundle
* and work on them instead (should be faster due to ^ and *
* operators being block-multiplies, etc.) */
else {
// Vnl is created as distributed ColumnBundle.
// the dimension that's distributed is ioninfo.species[sp].natoms
ColumnBundle Vnl(e.ioninfo.species[sp].natoms,Y.col[0]->basis,
"distributed");
// Fill in Vnl with pseudopotential elements
Vnl_pseudo_fill_matrix(&(e.ioninfo), Vnl,sp,lm);
// Now use Vnl!
// we want to do: HspY += Vnl*(Mnl*(Vnl^Y));
scalar Mnl = e.ioninfo.species[sp].pot.M[lm](0,0);
do_ColumnBundle_matrix_mult(Vnl,Mnl*(Vnl^Y),HspY,1);
}
}
}
/*
* Calculates the matrix elements of the non-local pseudopotential
* (multiple-projector form) for the ion'th ion of species sp
* for atomic state lm at k-vector kvec and returs the result
* as a column bundle of ioninfo->species[sp].ngamma[lm] columns of
* length basis->nbasis. ngamma is the number of multiple projectors
* for the species sp and state lm.
*
* Letting q = k + G, the matrix we are computing is Vnl with entries:
*
* Vnl(q,gamma) = <q|V_gamma*phi_gamma>
*
* where |q> is a plane-wave state with <r|q> = exp(i*q*r)/sqrt(Vol)
* and V_gamma is the non-local potential for the gamma'th state
* and phi_gamma is the atomic wave-function for the gamma'th state.
*
* Now, V_gamma*phi_gamma(r) = V_l(|r|)*phi_l(|r|)*Ylm(rhat)
* (hat == unit vec.)
*
* and using the relation:
*
* exp(iq.r) = sum_{l,m} { 4pi(i^l)j_l(|q|*|r|)Ylm*(qhat)Ylm(rhat) }
*
* We arrivate at,
*
* Vnl(q,gamma) = Y_lm(qhat)*(4*pi)*f_l(|q|)/sqrt(Vol)
*
* where
*
* f_l(q) = integral{0,inf} { dr*r^2*V_l(r)*phi_l(r)*(i^l*j_l(|r|*|q|)) }
*
* We get f_l(q) from the internal tables stored in ioninfo via interpolation.
*
*/
void
Vnl_pseudo(int sp,int ion,int lm,
Ioninfo *ioninfo,
ColumnBundle &Vnl)
{
Basis *basis = Vnl.col[0]->basis;
QuantumNumber *qnum = basis->qnum;
Lattice *lattice = basis->basis_spec->lattice;
complex S,Ylm;
real dot,posx,posy,posz,kx,ky,kz,kplusGx,kplusGy,kplusGz;
real qx,qy,qz,q,invq,qhatx,qhaty,qhatz;
real flq=0.0,r,invdq=0.0;
real GGTxx,GGTyy,GGTzz,GGTxy,GGTxz,GGTyz;
real Vol,invsqrtVol;
real ym1,y0,y1,y2,a,b,c,x;
int ngamma,gamma,n,j,nmax=0,l,m;
matrix3 G;
/* pi et al. */
const real pi = M_PI;
const real twopi = (real)2.0*pi;
const real fourpi = (real)4.0*pi;
const real sixth = (real)1.0/(real)6.0;
/* Ylm normalization constants */
const real N00 = sqrt(1.0/(4.0*pi));
const real N10 = sqrt(3.0/(4.0*pi));
const real N11 = -sqrt(3.0/(8.0*pi));
const real N1m1 = sqrt(3.0/(8.0*pi));
const real N20 = sqrt(5.0/(16.0*pi));
const real N21 = -sqrt(15.0/(8.0*pi));
const real N2m1 = sqrt(15.0/(8.0*pi));
const real N22 = sqrt(15.0/(32.0*pi));
const real N2m2 = sqrt(15.0/(32.0*pi));
Speciesinfo *sp_info = &(ioninfo->species[sp]);
/* Vnl will contain the final result */
ngamma = sp_info->pot.ngamma[lm];
if (Vnl.tot_ncols != ngamma)
die("\nIn Vnl_pseudo(), ngamma != Vnl.ncols\n\n");
if (Vnl.col_length != basis->nbasis)
die("\nIn Vnl_pseudo(), nbasis != Vnl.col_length\n\n");
Vnl.zero_out();
/* Get ionic position, k-vector, cell volume, G, and GGT matrix entries */
posx = sp_info->atpos[ion].v[0];
posy = sp_info->atpos[ion].v[1];
posz = sp_info->atpos[ion].v[2];
kx = qnum->kvec.v[0];
ky = qnum->kvec.v[1];
kz = qnum->kvec.v[2];
G = lattice->G;
GGTxx = lattice->GGT.m[0][0];
GGTyy = lattice->GGT.m[1][1];
GGTzz = lattice->GGT.m[2][2];
GGTxy = lattice->GGT.m[0][1];
GGTxz = lattice->GGT.m[0][2];
GGTyz = lattice->GGT.m[1][2];
Vol = lattice->unit_cell_volume;
invsqrtVol = (real)1.0/sqrt(Vol);
/* Get l,m values for the potential */
l = sp_info->pot.l[lm];
m = sp_info->pot.m[lm];
if (l < 0 || l > 2)
die("l != {0,1,2} nonlocal pseudopotentials not supported yet!\n");
if (m < -l || m > l)
die("Nonlocal pseudopotential with |m| > l !?!?\n");
/* Calculate the non-local pseudopotential matrix entries */
for (gamma = 0; gamma < ngamma; gamma++){
invdq = (real)1.0/sp_info->pot.dq_nl[lm][gamma];
nmax = sp_info->pot.ngrid_nl[lm][gamma]-3;
for (n = 0; n < basis->nbasis; n++){
/* q = k+G: here q is in reciprocal lattice coordinates */
qx = kplusGx = kx + basis->Gx[n];
qy = kplusGy = ky + basis->Gy[n];
qz = kplusGz = kz + basis->Gz[n];
/* S = exp(-i*dot(q,pos)) */
dot = -twopi*(posx*qx + posy*qy + posz*qz);
S.x = cos(dot);
S.y = sin(dot);
q = sqrt( qx*qx*GGTxx + qy*qy*GGTyy + qz*qz*GGTzz +
2.0*(qx*qy*GGTxy + qx*qz*GGTxz + qy*qz*GGTyz) );
/* Interpolate flq using internal tables */
r = q*invdq;
j = (int)r;
if (j > nmax){
dft_log(DFT_SILENCE,
"G=[%d %d %d] q=[%g %g %g] k=[%g %g %g]\n",
basis->Gx[n],basis->Gy[n],basis->Gz[n],
qx,qy,qz,kx,ky,kz);
die("Nonlocal pseudopotential grid is too small!\n");
}
/* Do cubic interlopation on flq with j-1, j, j+1, and j+2
* points from internal tables. If j==0, we clearly can't
* real j-1. However f_l(-q) = (-1)^l*f_l(q) so we can
* fill in the j-1 entry with the j+1 entry times (-1)^l */
if (j == 0)
ym1 = (1-2*(l%2))*sp_info->pot.flq[lm][gamma][j+1];
else
ym1 = sp_info->pot.flq[lm][gamma][j-1];
y0 = sp_info->pot.flq[lm][gamma][j ];
y1 = sp_info->pot.flq[lm][gamma][j+1];
y2 = sp_info->pot.flq[lm][gamma][j+2];
a = sixth*(y2-ym1+3.0*(y0-y1));
b = 0.5*(y1+ym1-y0-y0);
c = y1-sixth*(y2+3.0*y0+ym1+ym1);
x = r-j;
flq = ((a*x+b)*x+c)*x+y0;
/* The line below is quadratic interploation: */
/* flq = y0 + 0.5*x*(y1-ym1+x*(y1+ym1-y0-y0)); */
/* If we're at q=0, then we must handle the case separately
* since Ylm and qhat are not defined for q=0. The cases are:
* if l==0, Ylm is const.
* if l>0, flq=0 anyways, so even though Ylm is undefined,
* it doesn't matter what it is since the final result is zero. */
if (q < (real)1.0e-13){
if (l==0) { Ylm.x = N00; Ylm.y = 0.0; }
else { Ylm.x = Ylm.y = 0.0; }
goto skipcalcYlm;
}
/* Replace the above if block by the one below to reporduce
* cgrad results. */
/* if (j == 0) */
/* { */
/* flq = y0 + (y1-y0)*x; */
/* if (l==0) { Ylm.x = N00; Ylm.y = 0.0; } */
/* else { Ylm.x = Ylm.y = 0.0; } */
/* goto skipcalcYlm; */
/* } */
/* Otherwise, we have to calculate Ylm:
* qhat is a unit vector in direction of q: here q is in "real"
* bohr^-1 units and q = (k+G)*G where (k+G) is a row-vector. */
qx = kplusGx*G.m[0][0] + kplusGy*G.m[1][0] + kplusGz*G.m[2][0];
qy = kplusGx*G.m[0][1] + kplusGy*G.m[1][1] + kplusGz*G.m[2][1];
qz = kplusGx*G.m[0][2] + kplusGy*G.m[1][2] + kplusGz*G.m[2][2];
invq = (real)1.0/q;
qhatx = qx*invq;
qhaty = qy*invq;
qhatz = qz*invq;
/* Calculate Ylm */
switch (l){
case 0:
Ylm.x = N00;
Ylm.y = 0.0; break;
case 1:
switch(m){
case 0:
Ylm.x = N10*qhatz;
Ylm.y = 0.0; break;
case 1:
Ylm.x = N11*qhatx;
Ylm.y = N11*qhaty; break;
case -1:
Ylm.x = N1m1*qhatx;
Ylm.y = -N1m1*qhaty; break;
default:
die("PW_nlpot -- invalid m: %d for l: %d\n", m, l);
} break;
case 2:
switch (m){
case 0:
Ylm.x = N20*(3.0*qhatz*qhatz-1.0);
Ylm.y = 0.0; break;
case 1:
Ylm.x = N21*(qhatz*qhatx);
Ylm.y = N21*(qhatz*qhaty); break;
case -1:
Ylm.x = N2m1*( qhatz*qhatx);
Ylm.y = N2m1*(-qhatz*qhaty); break;
case 2:
Ylm.x = N22*(qhatx*qhatx-qhaty*qhaty);
Ylm.y = N22*(2.0*qhatx*qhaty); break;
case -2:
Ylm.x = N2m2*(qhatx*qhatx-qhaty*qhaty);
Ylm.y = N2m2*(-2.0*qhatx*qhaty); break;
default:
die("PW_nlpot -- invalid m: %d for l: %d\n", m, l);
} break;
default:
die("nonlocal pseudopot: l:d ?!?!!\n",l);
}
skipcalcYlm: ;
/* Calculate Vnl */
Vnl.col[gamma]->data.d[n] = (S*Ylm);
Vnl.col[gamma]->data.d[n] *= fourpi*flq*invsqrtVol;
}
}
}
//
// This routine fills in a non-local pseudopot. matrix in the case
// of Kleinman-Bylander projectors, for columns start_col to
// start_col+n_cols_todo-1 of Vnl.
//
static void
Vnl_pseudo_fill_some(Ioninfo *ioninfo,
ColumnBundle &Vnl,
int sp,int lm,
int start_col,int n_cols_todo)
{
if (n_cols_todo == 0) return;
Basis *basis = Vnl.col[0]->basis;
// Temporary space to hold the pseudopot for one atom
ColumnBundle Vnloneatom(1,basis,"local");
// Fill in the columns of Vnl
int i,j;
for (i=start_col; i < start_col+n_cols_todo; i++) {
Vnl_pseudo(sp,i+Vnl.start_ncol,
lm,ioninfo,Vnloneatom);
for (j=0; j < basis->nbasis; j++)
Vnl.col[i]->data.d[j] = Vnloneatom.col[0]->data.d[j];
}
}
//
// A thread runs this routine. The routine figures out which part of
// Vnl this thread should fill out, and then does exactly that by
// calling the above routine.
//
#ifdef DFT_THREAD
static void *
Vnl_pseudo_thread(void *arg)
{
// Decode the inputs
dft_thread_data *data = (dft_thread_data *)arg;
Ioninfo *ioninfo = (Ioninfo *)data->p1;
ColumnBundle *Vnl = (ColumnBundle *)data->p2;
int start_col = data->start;
int n_cols_todo = data->n;
int sp = data->i1;
int lm = data->i2;
// Fill in the matrix
Vnl_pseudo_fill_some(ioninfo,*Vnl,sp,lm,start_col,n_cols_todo);
// Free up data-passing struct and exit
myfree(arg);
return NULL;
}
#endif
//
// This routine is just a coding-saver, called from the outside,
// that fills in Vnl with non-local pseudo-pot matrix elements.
//
void
Vnl_pseudo_fill_matrix(Ioninfo *ioninfo,ColumnBundle &Vnl,
int sp,int lm)
{
#ifdef DFT_THREAD
// Threads! Distribute the work of filling in Vnl
dft_call_threads(Vnl.my_ncols,
(void *)ioninfo,(void *)&Vnl,NULL,NULL,NULL,
sp,lm,0,0,0,0,
Vnl_pseudo_thread);
#else
// Serial: just fill Vnl with one call
Vnl_pseudo_fill_some(ioninfo,Vnl,sp,lm,0,Vnl.my_ncols);
#endif
}
/*
Textbook formulae from Liboff:
Y0,0 = 1/sqrt(4pi)
Y1,0 = sqrt(3/(4*pi))*cos(theta)
Y1,(+-)1 = (-+)sqrt(3/(8pi))*sin(theta)*exp((+-)i*phi)
Y2,0 = sqrt(5/(16*pi))*(3*cos(theta)^2 -1)
Y2,(+-)1 = (-+)sqrt(15/(8pi))(sin(theta)*cos(theta))*exp((+-)i*phi)
Y2,(+-)2 = sqrt(15/(32*pi))(sin(theta)^2)*exp((+-)2i*phi)
*/
/*
* This routine does what the above one does, and also fills in
* dVnlx/y/z with the derivatives of Vnl versus the position of the
* ion 'ion' of species 'sp' (lattice coords).
*/
void
dVnl_pseudo_datom_pos(int sp,int ion,int lm,
/* Basis *basis,
Lattice *lattice,
*/ Ioninfo *ioninfo,
ColumnBundle &Vnl,
ColumnBundle &dVnlx,
ColumnBundle &dVnly,
ColumnBundle &dVnlz )
{
Basis *basis = Vnl.col[0]->basis;
QuantumNumber *qnum = basis->qnum;
Lattice *lattice = basis->basis_spec->lattice;
complex S,Ylm,negtwopiiVnl;
real dot,posx,posy,posz,kx,ky,kz,kplusGx,kplusGy,kplusGz;
real qx,qy,qz,q,invq,qhatx,qhaty,qhatz;
real flq=0.0,r,invdq=0.0;
real GGTxx,GGTyy,GGTzz,GGTxy,GGTxz,GGTyz;
real Vol,invsqrtVol;
real ym1,y0,y1,y2,a,b,c,x;
int ngamma,gamma,n,j,nmax=0,l,m;
matrix3 G;
/* pi et al. */
const real pi = M_PI;
const real twopi = (real)2.0*pi;
const real fourpi = (real)4.0*pi;
const real sixth = (real)1.0/(real)6.0;
/* Ylm normalization constants */
const real N00 = sqrt(1.0/(4.0*pi));
const real N10 = sqrt(3.0/(4.0*pi));
const real N11 = -sqrt(3.0/(8.0*pi));
const real N1m1 = sqrt(3.0/(8.0*pi));
const real N20 = sqrt(5.0/(16.0*pi));
const real N21 = -sqrt(15.0/(8.0*pi));
const real N2m1 = sqrt(15.0/(8.0*pi));
const real N22 = sqrt(15.0/(32.0*pi));
const real N2m2 = sqrt(15.0/(32.0*pi));
Speciesinfo *sp_info = &(ioninfo->species[sp]);
/* Vnl will contain the final result */
ngamma = sp_info->pot.ngamma[lm];
if (Vnl.tot_ncols != ngamma)
die("\nIn Vnl_pseudo(), ngamma != Vnl.ncols\n\n");
if (Vnl.col_length != basis->nbasis)
die("\nIn Vnl_pseudo(), nbasis != Vnl.col_length\n\n");
Vnl.zero_out();
/* Get ionic position, k-vector, cell volume, G, and GGT matrix entries */
posx = sp_info->atpos[ion].v[0];
posy = sp_info->atpos[ion].v[1];
posz = sp_info->atpos[ion].v[2];
kx = qnum->kvec.v[0];
ky = qnum->kvec.v[1];
kz = qnum->kvec.v[2];
G = lattice->G;
GGTxx = lattice->GGT.m[0][0];
GGTyy = lattice->GGT.m[1][1];
GGTzz = lattice->GGT.m[2][2];
GGTxy = lattice->GGT.m[0][1];
GGTxz = lattice->GGT.m[0][2];
GGTyz = lattice->GGT.m[1][2];
Vol = lattice->unit_cell_volume;
invsqrtVol = (real)1.0/sqrt(Vol);
/* Get l,m values for the potential */
l = sp_info->pot.l[lm];
m = sp_info->pot.m[lm];
if (l < 0 || l > 2)
die("l != {0,1,2} nonlocal pseudopotentials not supported yet!\n");
if (m < -l || m > l)
die("Nonlocal pseudopotential with |m| > l !?!?\n");
/* Calculate the non-local pseudopotential matrix entries */
for (gamma = 0; gamma < ngamma; gamma++) {
invdq = (real)1.0/sp_info->pot.dq_nl[lm][gamma];
nmax = sp_info->pot.ngrid_nl[lm][gamma]-3;
for (n = 0; n < basis->nbasis; n++) {
/* q = k+G: here q is in reciprocal lattice coordinates */
qx = kplusGx = kx + basis->Gx[n];
qy = kplusGy = ky + basis->Gy[n];
qz = kplusGz = kz + basis->Gz[n];
/* S = exp(-i*dot(q,pos)) */
dot = -twopi*(posx*qx + posy*qy + posz*qz);
S.x = cos(dot);
S.y = sin(dot);
q = sqrt( qx*qx*GGTxx + qy*qy*GGTyy + qz*qz*GGTzz +
2.0*(qx*qy*GGTxy + qx*qz*GGTxz + qy*qz*GGTyz) );
/* Interpolate flq using internal tables */
r = q*invdq;
j = (int)r;
if (j > nmax) {
dft_log(DFT_SILENCE,
"G=[%d %d %d] q=[%g %g %g] k=[%g %g %g]\n",
basis->Gx[n],basis->Gy[n],basis->Gz[n],
qx,qy,qz,kx,ky,kz);
die("Nonlocal pseudopotential grid is too small!\n");
}
/* Do cubic interlopation on flq with j-1, j, j+1, and j+2
* points from internal tables. If j==0, we clearly can't
* real j-1. However f_l(-q) = (-1)^l*f_l(q) so we can
* fill in the j-1 entry with the j+1 entry times (-1)^l */
if (j == 0)
ym1 = (1-2*(l%2))*sp_info->pot.flq[lm][gamma][j+1];
else
ym1 = sp_info->pot.flq[lm][gamma][j-1];
y0 = sp_info->pot.flq[lm][gamma][j ];
y1 = sp_info->pot.flq[lm][gamma][j+1];
y2 = sp_info->pot.flq[lm][gamma][j+2];
a = sixth*(y2-ym1+3.0*(y0-y1));
b = 0.5*(y1+ym1-y0-y0);
c = y1-sixth*(y2+3.0*y0+ym1+ym1);
x = r-j;
flq = ((a*x+b)*x+c)*x+y0;
/* The line below is quadratic interploation: */
/* flq = y0 + 0.5*x*(y1-ym1+x*(y1+ym1-y0-y0)); */
/* If we're at q=0, then we must handle the case separately
* since Ylm and qhat are not defined for q=0. The cases are:
* if l==0, Ylm is const.
* if l>0, flq=0 anyways, so even though Ylm is undefined,
* it doesn't matter what it is since the final result is zero. */
if (q < (real)1.0e-13) {
if (l==0) { Ylm.x = N00; Ylm.y = 0.0; }
else { Ylm.x = Ylm.y = 0.0; }
goto skipcalcYlm;
}
/* Otherwise, we have to calculate Ylm:
* qhat is a unit vector in direction of q: here q is in "real"
* bohr^-1 units and q = (k+G)*G where (k+G) is a row-vector. */
qx = kplusGx*G.m[0][0] + kplusGy*G.m[1][0] + kplusGz*G.m[2][0];
qy = kplusGx*G.m[0][1] + kplusGy*G.m[1][1] + kplusGz*G.m[2][1];
qz = kplusGx*G.m[0][2] + kplusGy*G.m[1][2] + kplusGz*G.m[2][2];
invq = (real)1.0/q;
qhatx = qx*invq;
qhaty = qy*invq;
qhatz = qz*invq;
/* Calculate Ylm */
switch(l){
case 0:
Ylm.x = N00;
Ylm.y = 0.0; break;
case 1:
switch(m){
case 0:
Ylm.x = N10*qhatz;
Ylm.y = 0.0; break;
case 1:
Ylm.x = N11*qhatx;
Ylm.y = N11*qhaty; break;
case -1:
Ylm.x = N1m1*qhatx;
Ylm.y = -N1m1*qhaty; break;
default:
die("PW_atom_nlpot -- invalid m: %d for l: %d\n", m, l);
} break;
case 2:
switch(m){
case 0:
Ylm.x = N20*(3.0*qhatz*qhatz-1.0);
Ylm.y = 0.0; break;
case 1:
Ylm.x = N21*(qhatz*qhatx);
Ylm.y = N21*(qhatz*qhaty); break;
case -1:
Ylm.x = N2m1*( qhatz*qhatx);
Ylm.y = N2m1*(-qhatz*qhaty); break;
case 2:
Ylm.x = N22*(qhatx*qhatx-qhaty*qhaty);
Ylm.y = N22*(2.0*qhatx*qhaty); break;
case -2:
Ylm.x = N2m2*(qhatx*qhatx-qhaty*qhaty);
Ylm.y = N2m2*(-2.0*qhatx*qhaty); break;
default:
die("PW_atom_nlpot -- invalid m: %d for l: %d\n", m, l);
} break;
default:
die("nonlocal pseudopot: l: %d ?!?!!\n",l);
}
skipcalcYlm: ;
/* Calculate Vnl */
Vnl.col[gamma]->data.d[n] = (S*Ylm);
Vnl.col[gamma]->data.d[n] *= fourpi*flq*invsqrtVol;
/* dVnl(k+G)/dtau[j] = -2*pi*i*(k[j]+G[j])*Vnl(k+G) */
negtwopiiVnl.x = twopi*Vnl.col[gamma]->data.d[n].y;
negtwopiiVnl.y = -twopi*Vnl.col[gamma]->data.d[n].x;
dVnlx.col[gamma]->data.d[n] = kplusGx*negtwopiiVnl;
dVnly.col[gamma]->data.d[n] = kplusGy*negtwopiiVnl;
dVnlz.col[gamma]->data.d[n] = kplusGz*negtwopiiVnl;
}
}
}
//
// Fills in columns start_col to start_col+n_cols_todo-1
// of Vnl and dVx,y,z by calling above routine.
//
static void
dVnl_pseudo_datom_fill_some(Ioninfo *ioninfo,
ColumnBundle &Vnl,
ColumnBundle &dVx,
ColumnBundle &dVy,
ColumnBundle &dVz,
int sp,int lm,
int start_col,int n_cols_todo)
{
Basis *basis = Vnl.col[0]->basis;
// Temporary workspaces to hold results for each atom
ColumnBundle Vnloneatom(1,basis,"local");
ColumnBundle dVxoneatom(1,basis,"local");
ColumnBundle dVyoneatom(1,basis,"local");
ColumnBundle dVzoneatom(1,basis,"local");
// Fill in the columns!
for (int i=start_col; i < start_col+n_cols_todo; i++) {
int j;
dVnl_pseudo_datom_pos(sp,i+Vnl.start_ncol,lm,ioninfo,
Vnloneatom,dVxoneatom,dVyoneatom,dVzoneatom);
for (j=0; j < basis->nbasis; j++) {
Vnl.col[i]->data.d[j] = Vnloneatom.col[0]->data.d[j];
dVx.col[i]->data.d[j] = dVxoneatom.col[0]->data.d[j];
dVy.col[i]->data.d[j] = dVyoneatom.col[0]->data.d[j];
dVz.col[i]->data.d[j] = dVzoneatom.col[0]->data.d[j];
}
}
}
#ifdef DFT_THREAD
//
// This routine is what a thread runs: it calls the above filling
// routine with the parts appropriate for this thread to fill out
//
static void *
dVnl_pseudo_datom_thread(void *arg)
{
// Decode what's coming in
dft_thread_data *data = (dft_thread_data *)arg;
Ioninfo *ioninfo = (Ioninfo *)data->p1;
ColumnBundle **Varray = (ColumnBundle **)data->p2;
ColumnBundle *Vnl = Varray[0];
ColumnBundle *dVx = Varray[1];
ColumnBundle *dVy = Varray[2];
ColumnBundle *dVz = Varray[3];
int sp = data->i1;
int lm = data->i2;
int start_col = data->start;
int n_cols_todo = data->n;
// Fill in the parts that this thread should fill
dVnl_pseudo_datom_fill_some(ioninfo,*Vnl,*dVx,*dVy,*dVz,
sp,lm,start_col,n_cols_todo);
// Free up data-passing struct and exit thread
myfree(arg);
return NULL;
}
#endif
//
// This routine has Vnl and dVx,y,z filled in with appropriate
// matrix elements of non-local pseudopot. for Kleinman-Bylander case.
//
void
dVnl_pseudo_datom_fill(Ioninfo *ioninfo,
ColumnBundle &Vnl,
ColumnBundle &dVx,
ColumnBundle &dVy,
ColumnBundle &dVz,
int sp,int lm)
{
#ifdef DFT_THREAD
// Threads! Distribute the filling among threads
// We need to pack the addresses of Vnl and dVx/y/z since
// we can only pass 4 pointers in the current version of threads
ColumnBundle *(Varray[4]);
Varray[0] = &Vnl; Varray[1] = &dVx; Varray[2] = &dVy; Varray[3] = &dVz;
dft_call_threads(Vnl.my_ncols,
(void *)ioninfo,(void *)Varray,NULL, NULL, NULL,
sp,lm,0,0,0,0,
dVnl_pseudo_datom_thread);
#else
// Serial: fill them in with one call
dVnl_pseudo_datom_fill_some(ioninfo,Vnl,dVx,dVy,dVz,
sp,lm,0,Vnl.my_ncols);
#endif
}
syntax highlighted by Code2HTML, v. 0.9.1