/*
    DFT++ is a density functional package developed by the research group
    of Professor Tomas Arias

    Copyright 1996-2003 Sohrab Ismail-Beigi

    This file is part of DFT++.

    DFT++ is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DFT++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DFT++; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Please see the file CREDITS for a list of authors.

    For academic users, we request that publications using results obtained with
    this software reference

    "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
    and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).

    and, if using the wavelet basis, further reference

    "Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
    T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).

    and 

    "Robust ab initio calculation of condensed matter: transparent convergence through
    semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
    Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).

    For your convenience, preprints of the above articles may be obtained from
    http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/

/*
 * calcionicforces.c:   Sohrab Ismail-Beigi     May 6, 1997
 *                        
 * A set of routines to calculate the forces on the atoms
 *
 */

/* $Id: PW_ionicforces.cpp,v 1.1.2.7 2003/05/29 18:54:03 ivan Exp $ */

#include "header.h"

//
// This routine actually calculates the forces due to the local
// potential on atoms start_atom to start_atom+natoms_todo-1
//
static void
calc_ionic_forces_Vloc_some(int sp,int start_atom,int natoms_todo,
                            const CoeffSpaceScalarFieldColumn &Jn,
                            //Basis *basis,Lattice *lattice,
                            Ioninfo &ioninfo,vector3 *force_buf)
{
  CoeffSpaceScalarFieldColumn dVx(Jn);
  CoeffSpaceScalarFieldColumn dVy(Jn);
  CoeffSpaceScalarFieldColumn dVz(Jn);

  int iat;

  
  for (iat = start_atom; iat < start_atom + natoms_todo; iat++){
    dVlocpot_datom_pos(ioninfo,sp,iat,dVx,dVy,dVz);
    force_buf[iat].v[0] = ioninfo.species[sp].forces[iat].v[0] - 
      REAL(dVx^Jn);
    force_buf[iat].v[1] = ioninfo.species[sp].forces[iat].v[1] -
      REAL(dVy^Jn);
    force_buf[iat].v[2] = ioninfo.species[sp].forces[iat].v[2] -
      REAL(dVz^Jn);
  }
}

#ifdef DFT_THREAD
//
// This is the routine run by a thread for calculating forces.
// It just calls the routine above with the right indices.
//
static void *
calc_ionic_forces_Vloc_thread(void *arg)
{
  // Decode the incoming data
  dft_thread_data *data = (dft_thread_data *)arg;
  CoeffSpaceScalarFieldColumn *Jn = (CoeffSpaceScalarFieldColumn *)data->p1;
  Ioninfo *ioninfo = (Ioninfo *)data->p2;
  vector3 *force_buf = (vector3 *)data->p3;
  int sp = data->i1;
  int offset = data->i2;
  int start_atom = data->start + offset;
  int natoms_todo = data->n;

  // Call routine, free memory used for passing info, and exit
  calc_ionic_forces_Vloc_some(sp,start_atom,natoms_todo,*Jn,
                              *ioninfo,force_buf);
  myfree(arg);
  return NULL;
}
#endif

/*
 * This routine calculates the forces on the atoms due to the derivate 
 * of the local pseudopotential versus atomic coordinates.
 * 
 * The forces are accumulated into ioninfo->species[].forces[]
 *
 * The ionic force due to local potential is:
 *   f_sp_nat = - (  dV_loc/dR_sp_nat ^ (Jn_v) )
 *
 * This is the routine called from the outside...
 */
void
calc_ionic_forces_Vloc(Elecvars &elecvars,
                       Ioninfo &ioninfo)
{
  dft_log("\n--- calc_atomic_forces_Vloc() ---\n");
  
  /* Calculate J(n) */
  CoeffSpaceScalarFieldColumn Jn(elecvars.Vlocpot);
  apply_J(elecvars.n,Jn);
  
  /* loop over species */
  int sp;
  for (sp=0; sp < ioninfo.nspecies; sp++) {
    int natoms = ioninfo.species[sp].natoms;
    dft_log("species = %d  natoms = %d\n",sp,natoms);
    dft_log_flush();
    
#ifdef DFT_MPI
    // Buffer to hold temporary results which will be accumulated
    // across processors to get the final result
      vector3 *force_buf = (vector3 *)mymalloc(sizeof(vector3)*natoms,
					       "force_buf[]","Vloc_force");
      // zero it out
      int i;
      for (i=0; i < natoms; i++)
        force_buf[i].v[0] = force_buf[i].v[1] = force_buf[i].v[2] = 0.0;
#else
      // Otherwise the buffer just points to where the forces go
      vector3 *force_buf = ioninfo.species[sp].forces;
#endif
      
      // Calculate forces over atoms
      // We use distributed a column_bundle with natoms columns
      // and 1 bit of data each only to distribute the atom numbers
      // across the processors, so we can parallelize the force
      // computation in the MPI case.
      ColumnBundle distrib(1,natoms);
      
      // If this processor has some forces to calculate, then do it!
      if (distrib.my_ncols > 0) 
#ifdef DFT_THREAD
        // Running threads... farm out the work to the threads
        dft_call_threads(distrib.my_ncols,
                         (void *)&Jn,(void *)&ioninfo,
                         (void *)force_buf, NULL, NULL,
                         sp,distrib.start_ncol,0,0,0,0,
                         calc_ionic_forces_Vloc_thread);
#else	
      calc_ionic_forces_Vloc_some(sp,
                                  distrib.start_ncol,distrib.my_ncols,
                                  Jn,ioninfo,force_buf);
#endif
      
#ifdef DFT_MPI
      // Now, got to communicate the forces result to everybody.
      // In principle, need to do bunch of broadcast calls.
      // For now, use a Allreduce call. Simple but may not be the best choice.
#ifdef DFT_PROFILING
      timerOn(13);  // Turn on other MPI_Allreduce timer
#endif // DFT_PROFILING
      MPI_Allreduce ( force_buf, &(ioninfo.species[sp].forces[0]),
                      3*natoms, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
#ifdef DFT_PROFILING
      timerOff(13);  // Turn off other MPI_Allreduce timer
#endif // DFT_PROFILING
      
      // We're done with the buffer
      myfree(force_buf);
      
#endif // DFT_MPI
      
    } /* species (sp) loop */
  

  dft_log("\n");
  dft_log_flush();
  
}


/*
 * This routine calculates the forces on the atoms due to the derivate 
 * of the non-local pseudopotential versus atomic coordinates.
 * 
 * The forces are accumulated into ioninfo->species[].forces[]
 *
 */
void
calc_ionic_forces_Vnl(Elecinfo &elecinfo,
                      Elecvars &elecvars,
                      Ioninfo &ioninfo)
{
  int sp,lm,i,q;
//  ColumnBundle *C = elecvars->C;
//  diag_matrix *F = elecinfo->F;
//  real *w = elecinfo->w;
  
  dft_log("\n--- calc_atomic_forces_Vnl() ---\n");
  
  for (sp=0; sp < ioninfo.nspecies; sp++)
    for (lm=0; lm < ioninfo.species[sp].pot.nlm; lm++){
      
      dft_log("species = %d   nlm = %d   lm = %d   natoms = %d\n",
              sp,ioninfo.species[sp].pot.nlm,lm,
              ioninfo.species[sp].natoms);
      dft_log_flush();
      
      if (ioninfo.species[sp].pot.ngamma[lm] > 1){
        dft_log(DFT_SILENCE,
                "\nMultiple-projectors: running slow nl forces!\n");
        
        Matrix VdagC(ioninfo.species[sp].pot.ngamma[lm],elecinfo.nbands);
        Matrix dVdagC(ioninfo.species[sp].pot.ngamma[lm],elecinfo.nbands);
        Matrix &Mnl = ioninfo.species[sp].pot.M[lm]; /* reference */
	    
        for (q=0; q < elecinfo.nstates; q++) {
          BlochState &s= elecvars.states[q];
          ColumnBundle Vnl(ioninfo.species[sp].pot.ngamma[lm],
                           s.C.col[0]->basis,"local");
          ColumnBundle dVx(ioninfo.species[sp].pot.ngamma[lm],
                           s.C.col[0]->basis,"local");
          ColumnBundle dVy(ioninfo.species[sp].pot.ngamma[lm],
                           s.C.col[0]->basis,"local");
          ColumnBundle dVz(ioninfo.species[sp].pot.ngamma[lm],
                           s.C.col[0]->basis,"local");
	      
          for (i=0; i < ioninfo.species[sp].natoms; i++) {
            dVnl_pseudo_datom_pos(sp,i,lm, &ioninfo,
                                  Vnl,dVx,dVy,dVz);
            VdagC = Vnl^s.C;
            /* x-force */
            dVdagC = dVx^s.C;
            ioninfo.species[sp].forces[i].v[0] += \
              -2.0*REAL(s.w*trace(Mnl*dVdagC*s.F*herm_adjoint(VdagC)));
            /* y-force */
            dVdagC = dVy^s.C;
            ioninfo.species[sp].forces[i].v[1] += \
              -2.0*REAL(s.w*trace(Mnl*dVdagC*s.F*herm_adjoint(VdagC)));
            /* z-force */
            dVdagC = dVz^s.C;
            ioninfo.species[sp].forces[i].v[2] += \
              -2.0*REAL(s.w*trace(Mnl*dVdagC*s.F*herm_adjoint(VdagC)));
          }
        }
      }
      /* Kleinman-Bylander:  bunch up all local potentials for
       * the atoms of this species and state into a big column_bundle
       * and work on them instead (should be faster due to ^ and *
       * operators being block-multiplies, etc.).
       */
      else {
	    Matrix VdagC(ioninfo.species[sp].natoms,elecinfo.nbands);
	    Matrix dVdagC(ioninfo.species[sp].natoms,elecinfo.nbands);
	    scalar Mnl = ioninfo.species[sp].pot.M[lm](0,0);
	    diag_matrix fvector(ioninfo.species[sp].natoms);
        
	    /* To save us from repeatly allocate and deallocate
	     * column_bundles, allocate once for the largest basis
	     * and keep using it. */
/*	    int max_nbasis = 0;
	    for (q=0; q < elecinfo->nstates; q++) 
	      if (max_nbasis < basis[q].nbasis)
            max_nbasis = basis[q].nbasis;

	    // Vnl, dVx, dVy, dVz are created as distributed column_bundles.
	    // the dimension that's distributed is ioninfo->species[sp].natoms
	    column_bundle Vnl(ioninfo->species[sp].natoms,max_nbasis);
	    column_bundle dVx(ioninfo->species[sp].natoms,max_nbasis);
	    column_bundle dVy(ioninfo->species[sp].natoms,max_nbasis);
	    column_bundle dVz(ioninfo->species[sp].natoms,max_nbasis);
*/      

	    for (q=0; q < elecinfo.nstates; q++) {
/*
          // Manually adjust the dimension of column_bundles.
          Vnl.col_length = basis[q].nbasis;
          dVx.col_length = basis[q].nbasis;
          dVy.col_length = basis[q].nbasis;
          dVz.col_length = basis[q].nbasis;
          copy_innards_column_bundle(&(C[q]),&Vnl);
*/
          BlochState &s = elecvars.states[q];
          ColumnBundle Vnl(ioninfo.species[sp].natoms, s.C.col[0]->basis,
                           "distributed");
          ColumnBundle dVx(ioninfo.species[sp].natoms, s.C.col[0]->basis,
                           "distributed");
          ColumnBundle dVy(ioninfo.species[sp].natoms, s.C.col[0]->basis,
                           "distributed");
          ColumnBundle dVz(ioninfo.species[sp].natoms, s.C.col[0]->basis,
                           "distributed");
          
          // Fill in Vnl and dVx/y/z
          dVnl_pseudo_datom_fill(&ioninfo, Vnl,dVx,dVy,dVz,sp,lm);
          
          /* Now use Vnl and dVx/y/z to calculate forces: */
          VdagC = Vnl^s.C;

          /* x-forces */
          dVdagC = dVx^s.C;
          fvector = -2.0*s.w*Mnl*diag(dVdagC*s.F*herm_adjoint(VdagC));
          for (i=0; i < ioninfo.species[sp].natoms; i++)
            ioninfo.species[sp].forces[i].v[0] += REAL(fvector.c[i]);

          /* y-forces */
          dVdagC = dVy^s.C;
          fvector = -2.0*s.w*Mnl*diag(dVdagC*s.F*herm_adjoint(VdagC));
          for (i=0; i < ioninfo.species[sp].natoms; i++)
            ioninfo.species[sp].forces[i].v[1] += REAL(fvector.c[i]);

          /* z-forces */
          dVdagC = dVz^s.C;
          fvector = -2.0*s.w*Mnl*diag(dVdagC*s.F*herm_adjoint(VdagC));
          for (i=0; i < ioninfo.species[sp].natoms; i++)
            ioninfo.species[sp].forces[i].v[2] += REAL(fvector.c[i]);

        }
	  }
      
    }
  
  dft_log("\n");
  dft_log_flush();

}


//
// This routine does the actual Ewald force caluclation for atoms
// start_atom to start_atom+natoms_todo-1.
//
static void
calc_ionic_forces_Ewald_some(int sp,int start_atom,int natoms_todo,
			     Lattice *lattice,Ioninfo *ioninfo,
			     vector3 *force_buf)
{
  for (int iat=start_atom; iat < start_atom+natoms_todo; iat++)
    force_buf[iat] = ioninfo->species[sp].forces[iat] -
      dEwald_datom_pos(*ioninfo,*lattice,sp,iat);
}

#ifdef DFT_THREAD
//
// This is the routine run by a thread for calculating forces.
// It just calls the routine above with the right indices.
//
static void *
calc_ionic_forces_Ewald_thread(void *arg)
{
  // Decode the incoming data
  dft_thread_data *data = (dft_thread_data *)arg;
  Lattice *lattice = (Lattice *)data->p1;
  Ioninfo *ioninfo = (Ioninfo *)data->p2;
  vector3 *force_buf = (vector3 *)data->p3;
  int sp = data->i1;
  int offset = data->i2;
  int start_atom = data->start + offset;
  int natoms_todo = data->n;

  // Call routine, free memory used for passing info, and exit
  calc_ionic_forces_Ewald_some(sp,start_atom,natoms_todo,
			       lattice,ioninfo,force_buf);
  myfree(arg);
  return NULL;
}
#endif

/*
 * This routine calculates the forces on the atoms due to the derivate 
 * of the Ewald energy versus atomic positions.
 * 
 * The forces are accumulated into ioninfo->species[].forces[]
 *
 */
void
calc_ionic_forces_Ewald(Ioninfo *ioninfo, Lattice *lattice)
{
  int sp;

  dft_log("\n------ calc_ionic_forces_ewald() ------\n");

  /* loop over species */
  for (sp=0; sp < ioninfo->nspecies; sp++)
    {
      int natoms = ioninfo->species[sp].natoms;

      dft_log("species = %d   natoms = %d\n",sp,natoms);
      dft_log_flush();

#ifdef DFT_MPI
      // Buffer to hold temporary results which will be accumulated
      // across processors to get the final result
      vector3 *force_buf = (vector3 *)mymalloc(sizeof(vector3)*natoms,
					       "force_buf[]","Ewald_force");
      // zero it out
      int i;
      for (i=0; i < natoms; i++)
	force_buf[i].v[0] = force_buf[i].v[1] = force_buf[i].v[2] = 0.0;
#else
      // Otherwise the buffer just points to where the forces go
      vector3 *force_buf = ioninfo->species[sp].forces;
#endif

      // Calculate forces over atoms
      // We use distributed a column_bundle with natoms columns
      // and 1 bit of data each only to distribute the atom numbers
      // across the processors, so we can parallelize the force
      // computation in the MPI case.
      ColumnBundle distrib(1,natoms);

      // If this processor has work to do, do it
      if (distrib.my_ncols > 0) 
#ifdef DFT_THREAD
	dft_call_threads(distrib.my_ncols,
			 (void *)lattice,(void *)ioninfo,
                     (void *)force_buf,NULL, NULL,
			 sp,distrib.start_ncol,0,0,0,0,
			 calc_ionic_forces_Ewald_thread);
#else
	calc_ionic_forces_Ewald_some(sp,
				     distrib.start_ncol,distrib.my_ncols,
				     lattice,ioninfo,force_buf);
#endif

#ifdef  DFT_MPI
      // Now, got to communicate the forces result to everybody.
      // In principle, need to do bunch of broadcast calls.
      // For now, use a Allreduce call. Simple but may not be the best choice.
#ifdef DFT_PROFILING
      timerOn(13);  // Turn on other MPI_Allreduce timer
#endif // DFT_PROFILING
      MPI_Allreduce ( force_buf, &(ioninfo->species[sp].forces[0]),
		      3*natoms, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
#ifdef DFT_PROFILING
      timerOff(13);  // Turn off other MPI_Allreduce timer
#endif // DFT_PROFILING

      // We're done with the temporary buffer
      myfree(force_buf);

#endif // DFT_MPI

    }

  dft_log("\n");
  dft_log_flush();
}


/*
 * Calculates the forces on the ions (in lattice coordinates).
 * The three energy terms that must be differentialted are the
 * local pseudopot. energy, the non-local pseudopot energy, and the
 * Ewald energy.
 *
 */
void calc_ionic_forces(Everything &everything)
{

  Elecinfo &elecinfo = everything.elecinfo;
  Elecvars &elecvars = everything.elecvars;
  Ioninfo &ioninfo = everything.ioninfo;
//  Basis *basis = everything.basis;
  Lattice &lattice = everything.lattice;

  /* Zero out forces, copying them to storage area*/
  int sp,i;
  for (sp=0; sp < ioninfo.nspecies; sp++)
    for (i=0; i < ioninfo.species[sp].natoms; i++){
      ioninfo.species[sp].old_forces[i] = ioninfo.species[sp].forces[i];
      ioninfo.species[sp].forces[i] = 0.0;
    }
  
  /* local pseudopot contribution */
  calc_ionic_forces_Vloc(elecvars,ioninfo);

  /* non-local pseudopot contribution */
  calc_ionic_forces_Vnl(elecinfo,elecvars,ioninfo);
  
  /* Ewald contribution */
  calc_ionic_forces_Ewald(&ioninfo,&lattice);
  
  /* Symmetrize forces */
  ioninfo.symmetrize_force(everything.symm);
}


syntax highlighted by Code2HTML, v. 0.9.1