Merge branch 'develCG' into 'develop'

Refactoring of common code in QEHeat and CP's conjugate gradient routine See merge request QEF/q-e!1530
2021-08-22 19:34:01 +00:00 · 2021-08-22 19:34:01 +00:00 · c41432f842
parent 88666e647b aa35e27909
commit c41432f842
16 changed files with 461 additions and 143 deletions
--- a/CPV/src/cg_sub.f90
+++ b/CPV/src/cg_sub.f90
@ -12,7 +12,8 @@
      rhor, rhog, rhos, rhoc, ei1, ei2, ei3, sfac, fion, ema0bg, becdr, &
      lambdap, lambda, nlam, vpot, c0, cm, phi, dbec,l_cprestart  )

-!! please see https://journals.aps.org/rmp/abstract/10.1103/RevModPhys.64.1045
+!! please see https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.79.1337 (ensemble DFT)
+!! and        https://journals.aps.org/rmp/abstract/10.1103/RevModPhys.64.1045 (conjugate gradient)

      use kinds, only: dp
      use control_flags, only: tpre, iverbosity, tfor, tprnfor
@ -56,7 +57,7 @@
      USE cp_main_variables,        ONLY : idesc, drhor, drhog
      USE mp_global, ONLY:  me_image, my_image_id, nbgrp
      USE fft_base,  ONLY: dffts, dfftp
-
+      use wave_gauge, only: project_parallel_gauge_2 

 !
      implicit none
@ -940,33 +941,11 @@
 !if required project c0 on previous manifold of occupied states                                                                                    
 !NOT IMPLEMENTED YET FOR ENSEMBLE DFT AND NSPIN==2
 !NOT IMPLEMENTED FOR US PSEUDOPOTENTIALS
-
-           lambda_repl=0.d0
-            do i = 1, nss
-               do j = 1, nss
-                  ii = i + istart - 1
-                  jj = j + istart - 1
-                  do ig = 1, ngw
-                     lambda_repl( i, j ) = lambda_repl( i, j ) + &
-                          2.d0 * DBLE( CONJG( c0old( ig, ii ) ) * c0( ig, jj) )
-                  enddo
-                  if( gstart == 2 ) then
-                     lambda_repl( i, j ) = lambda_repl( i, j ) - &
-                          DBLE( CONJG( c0old( 1, ii ) ) * c0( 1, jj ) )
-                  endif
-               enddo
-            enddo
-            CALL mp_sum( lambda_repl, intra_bgrp_comm )
-
-
            cm(:,:)=c0(:,:)
-            c0=(0.d0,0.d0)
-            do i=1,nss
-               do j=1,nss
-                  c0(1:ngw,i)=c0(1:ngw,i)+lambda_repl(i,j)*cm(1:ngw,j)
-               enddo
-            enddo
-          
+            call project_parallel_gauge_2(c0old, cm, c0, &
+                                          nss, ngw, ngw,gstart) 
+
+
            call calbec (nbsp,betae,c0,bec)
            CALL gram_bgrp( betae, bec, nkb, c0, ngw )
            call calbec(nbsp, betae,c0,bec)
--- a/CPV/src/make.depend
+++ b/CPV/src/make.depend
@ -29,6 +29,7 @@ cg_sub.o : ../../Modules/ions_base.o
 cg_sub.o : ../../Modules/kind.o
 cg_sub.o : ../../Modules/mp_global.o
 cg_sub.o : ../../Modules/recvec.o
+cg_sub.o : ../../Modules/wave_gauge.o
 cg_sub.o : ../../UtilXlib/mp.o
 cg_sub.o : ../../upflib/uspp.o
 cg_sub.o : cg.o
--- a/CPV/src/newd.f90
+++ b/CPV/src/newd.f90
@ -37,7 +37,7 @@
      IMPLICIT NONE
 ! input
      REAL(DP), INTENT(IN) ::  rhovan(nhm*(nhm+1)/2,nat,nspin)
-      REAL(DP)  vr(dfftp%nnr,nspin)
+      REAL(DP), INTENT(IN) ::  vr(dfftp%nnr,nspin)
      LOGICAL, INTENT(IN) :: tprint
 ! output
      REAL(DP)  fion(3,nat)
--- a/Modules/CMakeLists.txt
+++ b/Modules/CMakeLists.txt
@ -93,6 +93,7 @@ set(src_modules
    wyckoff.f90 
    wypos.f90 
    zvscal.f90 
+    wave_parallel.f90
    # list of subroutines and functions (not modules) previously found in flib/
    atom_weight.f90 
    capital.f90 
--- a/Modules/Makefile
+++ b/Modules/Makefile
@ -102,7 +102,8 @@ fox_init_module.o \
 xsf.o \
 wyckoff.o \
 wypos.o \
-zvscal.o 
+zvscal.o \
+wave_gauge.o

 # list of subroutines and functions (not modules) previously found in flib/

--- a/Modules/make.depend
+++ b/Modules/make.depend
@ -395,6 +395,9 @@ wannier_gw.o : io_global.o
 wannier_gw.o : kind.o
 wannier_gw.o : recvec.o
 wannier_new.o : kind.o
+wave_gauge.o : ../UtilXlib/mp.o
+wave_gauge.o : kind.o
+wave_gauge.o : mp_pools.o
 wavefunctions.o : kind.o
 wavefunctions_gpu.o : wavefunctions.o
 wgauss.o : constants.o
--- a/Modules/wave_gauge.f90
+++ b/Modules/wave_gauge.f90
@ -0,0 +1,155 @@
+!
+! Copyright (C) 2001-2021 Quantum ESPRESSO group
+! This file is distributed under the terms of the
+! GNU General Public License. See the file `License'
+! in the root directory of the present distribution,
+! or http://www.gnu.org/copyleft/gpl.txt .
+!
+!
+!-----------------------------------------------------------------------
+
+MODULE wave_gauge
+! this module contains routines that are used to compute the numerical
+! derivative of the wavefunctions fixing a particular gauge
+
+   USE kinds, ONLY: DP
+   implicit none
+
+
+   contains
+
+   subroutine compute_dot_evc_parallel_gauge(t_minus, t_zero, t_plus, dot_evc, nbnd, npw, npwx, gstart)
+      ! compute the numerical derivative using the wavefunctions computed at t-dt/2, t, t+dt/2 
+      ! in the parallel transport gauge
+
+      integer, intent(in) :: nbnd, npw, npwx, gstart
+      COMPLEX(DP), intent(inout) ::  dot_evc(:, :)
+      complex(dp), intent(in) :: t_minus(:,:), t_zero(:,:), t_plus(:,:)
+
+      dot_evc=(0.0_dp, 0.0_dp)
+      call project_parallel_gauge(t_minus, t_zero, t_plus, dot_evc, nbnd, npw, npwx, gstart, 1.0_dp,&
+                                  .true., .true.)
+      
+   end subroutine
+
+   subroutine project_parallel_gauge_2(t_minus, t_zero, t_zero_proj , nbnd, npw, npwx, gstart)
+      ! project t_zero over the manifold of t_minus in the parallel transport gauge.
+      ! Do not project over the not occupied manifold
+
+      integer, intent(in) :: nbnd, npw, npwx, gstart
+      COMPLEX(DP), intent(inout) ::   t_zero_proj(:,:)
+      complex(dp), intent(in) :: t_minus(:, :),  t_zero(:,:)
+      complex(dp), allocatable :: dummy(:,:)
+
+      t_zero_proj=0.0_dp
+      call project_parallel_gauge(t_zero, t_minus, dummy, t_zero_proj,  nbnd, npw, npwx,&
+                                  gstart, -1.0_dp, .false., .false. )
+
+   end subroutine 
+
+   subroutine project_parallel_gauge(t_minus, t_zero, t_plus, dot_evc, nbnd, npw, npwx, gstart, &
+                                     factor, use_t_plus, project_conduction)
+      !! implemented only in the plain dft norm conserving pseudopotential case 
+      !! let P be the projector over the occupied manifold space:
+      !! P = \sum_v |c_v><c_v|
+      !! then (no surprise)
+      !! |c> = P |c>
+      !! doing a time derivative:
+      !! \dot |c> = \dot P |c> + P \dot |c>
+      !! the parallel transport gauge fixes P \dot |c> to zero (the derivative
+      !! has no components over the occupied space). \dot P can be computed numerically
+      !! using wavefunctions that possibly have different gauges.
+      !! Then, to make sure that there are no components over the occupied space,
+      !! a projector over the conduction band is added if project_conduction is .true.:
+      !!
+      !!  (1-P) \dot P |c> 
+      !! 
+      !! t_minus, t_zero, t_plus are the wavefunctions. t_minus and t_plus are separated by a time dt
+      !! t_zero is computed in the middle. The gauge of the result is the same of t_zero.
+      !! This works also if you put t_zero = t_minus or t_zero = t_plus (be careful to the gauge!).
+      !! This routine adds \dot |c>*dt*factor (the derivative multiplied by dt multiplied by the specified factor)
+      !! to the output array dot_evc with the gauge of t_zero. Gauges of t_minus and t_plus should not enter the
+      !! result and can be arbitrary. If use_t_plus is false, t_plus = t_zero is assumed and some cpu time
+      !! is saved. If project_conduction is true the (1-P) projector is computed, and this involves an additional
+      !! matrix-matrix product with dimensions nbnd x nbnd
+      use mp, only: mp_sum
+      USE mp_pools, ONLY: intra_pool_comm
+
+      integer, intent(in) :: nbnd, npw, npwx, gstart
+      COMPLEX(DP), intent(inout) ::  dot_evc(:, :)
+      complex(dp), intent(in) :: t_minus(:,:), t_zero(:,:), t_plus(:,:)
+      real(dp), intent(in) :: factor
+      logical, intent(in) :: use_t_plus, project_conduction
+
+      real(DP), allocatable :: sa(:,:), ssa(:,:), sb(:,:), ssb(:,:)
+      complex(dp) :: tmp
+      integer :: ibnd, jbnd, ig
+
+
+      allocate(sb(nbnd, nbnd))
+! \dot P |c> = 
+! computed sa_ij = < t_plus_i, t_zero_j >, sb_ij = < t_minus_i, t_zero_j >
+! remove contribution at G=0 due to c(-g)*=c(g), and only half vector is stored
+! (gamma trick)
+! sa is the identity if 2 points are used (t_plus==t_zero)
+      if (use_t_plus) then
+         allocate(sa(nbnd, nbnd))
+         call dgemm('T', 'N', nbnd, nbnd, 2*npw, 2.d0, t_plus, 2*npwx, t_zero, 2*npwx, 0.d0, sa, nbnd)
+      end if
+      call dgemm('T', 'N', nbnd, nbnd, 2*npw, 2.d0, t_minus, 2*npwx, t_zero, 2*npwx, 0.d0, sb, nbnd)
+      if (gstart == 2) then
+         do ibnd = 1, nbnd
+            do jbnd = 1, nbnd
+               if (use_t_plus) &
+                  sa(ibnd, jbnd) = sa(ibnd, jbnd) - dble(conjg(t_plus(1, ibnd))*t_zero(1, jbnd))
+               sb(ibnd, jbnd) = sb(ibnd, jbnd) - dble(conjg(t_minus(1, ibnd))*t_zero(1, jbnd))
+            end do
+         end do
+      end if
+      call mp_sum(sb, intra_pool_comm)
+      if (use_t_plus) &
+         call mp_sum(sa, intra_pool_comm)
+
+      ! compute scalar products that appear due to the ( 1 - P ) projector over
+      ! the empty bands, that are 
+      ! ssa = sa ^ 2 and ssb = sb ^ 2. The gauge cancels out here
+      if (project_conduction) then
+         allocate(ssb(nbnd, nbnd))
+         if (use_t_plus) then
+             allocate(ssa(nbnd, nbnd))
+             call dgemm('T', 'N', nbnd, nbnd, nbnd, 1.d0, sa, nbnd, sa, nbnd, 0.d0, ssa, nbnd)
+         end if
+         call dgemm('T', 'N', nbnd, nbnd, nbnd, 1.d0, sb, nbnd, sb, nbnd, 0.d0, ssb, nbnd)
+      end if
+      ! compute final projection
+      do ibnd = 1, nbnd
+         do jbnd = 1, nbnd
+            do ig = 1, npw
+                tmp=0.0_dp
+                tmp = tmp - t_minus(ig, jbnd)*sb(jbnd, ibnd)
+                if (use_t_plus) then
+                   tmp = tmp + t_plus(ig, jbnd)*sa(jbnd, ibnd)
+                endif
+                if (project_conduction) then
+                   if (use_t_plus)&
+                       tmp = tmp - t_zero(ig, jbnd)*ssa(ibnd, jbnd)
+                   tmp = tmp  + t_zero(ig, jbnd)*ssb(ibnd, jbnd)
+                endif
+
+                dot_evc(ig, ibnd) = dot_evc(ig, ibnd) + factor*tmp
+            end do
+         end do
+      end do
+      if (project_conduction) then
+         deallocate(ssb)
+         if (use_t_plus)&
+             deallocate(ssa)
+      end if
+      if (use_t_plus)&
+          deallocate(sa)
+      deallocate(sb)
+
+   end subroutine
+
+
+end module
--- a/QEHeat/Doc/INPUT_ALL_CURRENTS.def
+++ b/QEHeat/Doc/INPUT_ALL_CURRENTS.def
@ -301,6 +301,29 @@ input_description -distribution {Quantum Espresso} -package QEHeat -program all_
                 You may want to specify startingwfc = 'random' in the ELECTRONS namelist.
               }
       }
+       var n_workers -type INTEGER {
+           default { 0 }
+           info {
+                The calculation over all the trajectory is splitted in @ref n_workers chunks. Then to run the code over all
+                the trajectory you must run @ref n_workers input files each one with a different @ref worker_id,
+                from 0 to @ref n_workers - 1 . Those inputs can run at the same time in the same folder. The @ref worker_id
+                will be appended to the outdir folder and to the @ref file_output input variables, so you can safely run all
+                the inputs in the same directory at the same time.
+           }
+       }
+       var worker_id -type INTEGER {
+           default { 0 }
+           info {
+                See @ref n_workers variable
+           }
+       }
+       var continue_not_converged -type LOGICAL {
+           default { .false. }
+           info {
+                If it is not possible to find a ground state for a given frame of the trajectory, go to the next one.
+                You will not find this step in the output file(s).
+           }
+       }

    }

--- a/QEHeat/Doc/INPUT_ALL_CURRENTS.html
+++ b/QEHeat/Doc/INPUT_ALL_CURRENTS.html
@ -58,7 +58,7 @@
   <p><a href="#idm3">INTRODUCTION</a></p>
   <p><a href="#idm35">&amp;ENERGY_CURRENT</a></p>
 <blockquote>
-<a href="#idm36">delta_t</a> | <a href="#idm39">file_output</a> | <a href="#idm46">trajdir</a> | <a href="#idm52">vel_input_units</a> | <a href="#idm58">eta</a> | <a href="#idm61">n_max</a> | <a href="#idm64">first_step</a> | <a href="#idm68">last_step</a> | <a href="#idm72">step_mul</a> | <a href="#idm77">step_rem</a> | <a href="#idm82">ethr_small_step</a> | <a href="#idm86">ethr_big_step</a> | <a href="#idm89">restart</a> | <a href="#idm94">subtract_cm_vel</a> | <a href="#idm97">add_i_current_b</a> | <a href="#idm100">save_dvpsi</a> | <a href="#idm103">re_init_wfc_1</a> | <a href="#idm106">re_init_wfc_2</a> | <a href="#idm110">re_init_wfc_3</a> | <a href="#idm113">three_point_derivative</a> | <a href="#idm118">n_repeat_every_step</a>
+<a href="#idm36">delta_t</a> | <a href="#idm39">file_output</a> | <a href="#idm46">trajdir</a> | <a href="#idm52">vel_input_units</a> | <a href="#idm58">eta</a> | <a href="#idm61">n_max</a> | <a href="#idm64">first_step</a> | <a href="#idm68">last_step</a> | <a href="#idm72">step_mul</a> | <a href="#idm77">step_rem</a> | <a href="#idm82">ethr_small_step</a> | <a href="#idm86">ethr_big_step</a> | <a href="#idm89">restart</a> | <a href="#idm94">subtract_cm_vel</a> | <a href="#idm97">add_i_current_b</a> | <a href="#idm100">save_dvpsi</a> | <a href="#idm103">re_init_wfc_1</a> | <a href="#idm106">re_init_wfc_2</a> | <a href="#idm110">re_init_wfc_3</a> | <a href="#idm113">three_point_derivative</a> | <a href="#idm118">n_repeat_every_step</a> | <a href="#idm123">n_workers</a> | <a href="#idm132">worker_id</a> | <a href="#idm136">continue_not_converged</a>
 </blockquote>
 </blockquote>
 </blockquote>
@ -550,6 +550,56 @@ is written with some statistics. Note that if you don't specify at least <a href
 You may want to specify startingwfc = 'random' in the ELECTRONS namelist.
         </pre></blockquote></td></tr>
 </table>
+<div align="right" style="margin-bottom: 5;">[<a href="#__top__">Back to Top</a>]</div>
+      <a name="idm123"></a><a name="n_workers"></a><table width="100%" style="border-color:   #b5b500; border-style: solid; border-width: 2; margin-bottom: 10; table-layout: auto; background-color: #FFFFFF;">
+<tr>
+<th align="left" valign="top" width="20%" style="background: #ffff99; padding: 2 2 2 10; ">n_workers</th>
+<td style="text-align: left; vertical-align: top; background: #ffffc3; padding: 2 2 2 5; ">INTEGER</td>
+</tr>
+<tr>
+<td style="text-align: right; vertical-align: top; background: #ffffc3; padding: 2 10 2 10; "><i>Default:</i></td>
+<td style="text-align: left;  vertical-align: top; background: #fff3d9; padding: 2 2 2 5; "> 0
+         </td>
+</tr>
+<tr><td align="left" valign="top" colspan="2"><blockquote><pre style="margin-bottom: -1em;">
+The calculation over all the trajectory is splitted in <a href="#n_workers">n_workers</a> chunks. Then to run the code over all
+the trajectory you must run <a href="#n_workers">n_workers</a> input files each one with a different <a href="#worker_id">worker_id</a>,
+from 0 to <a href="#n_workers">n_workers</a> - 1 . Those inputs can run at the same time in the same folder. The <a href="#worker_id">worker_id</a>
+will be appended to the outdir folder and to the <a href="#file_output">file_output</a> input variables, so you can safely run all
+the inputs in the same directory at the same time.
+         </pre></blockquote></td></tr>
+</table>
+<div align="right" style="margin-bottom: 5;">[<a href="#__top__">Back to Top</a>]</div>
+      <a name="idm132"></a><a name="worker_id"></a><table width="100%" style="border-color:   #b5b500; border-style: solid; border-width: 2; margin-bottom: 10; table-layout: auto; background-color: #FFFFFF;">
+<tr>
+<th align="left" valign="top" width="20%" style="background: #ffff99; padding: 2 2 2 10; ">worker_id</th>
+<td style="text-align: left; vertical-align: top; background: #ffffc3; padding: 2 2 2 5; ">INTEGER</td>
+</tr>
+<tr>
+<td style="text-align: right; vertical-align: top; background: #ffffc3; padding: 2 10 2 10; "><i>Default:</i></td>
+<td style="text-align: left;  vertical-align: top; background: #fff3d9; padding: 2 2 2 5; "> 0
+         </td>
+</tr>
+<tr><td align="left" valign="top" colspan="2"><blockquote><pre style="margin-bottom: -1em;">
+See <a href="#n_workers">n_workers</a> variable
+         </pre></blockquote></td></tr>
+</table>
+<div align="right" style="margin-bottom: 5;">[<a href="#__top__">Back to Top</a>]</div>
+      <a name="idm136"></a><a name="continue_not_converged"></a><table width="100%" style="border-color:   #b5b500; border-style: solid; border-width: 2; margin-bottom: 10; table-layout: auto; background-color: #FFFFFF;">
+<tr>
+<th align="left" valign="top" width="20%" style="background: #ffff99; padding: 2 2 2 10; ">continue_not_converged</th>
+<td style="text-align: left; vertical-align: top; background: #ffffc3; padding: 2 2 2 5; ">LOGICAL</td>
+</tr>
+<tr>
+<td style="text-align: right; vertical-align: top; background: #ffffc3; padding: 2 10 2 10; "><i>Default:</i></td>
+<td style="text-align: left;  vertical-align: top; background: #fff3d9; padding: 2 2 2 5; "> .false.
+         </td>
+</tr>
+<tr><td align="left" valign="top" colspan="2"><blockquote><pre style="margin-bottom: -1em;">
+If it is not possible to find a ground state for a given frame of the trajectory, go to the next one.
+You will not find this step in the output file(s).
+         </pre></blockquote></td></tr>
+</table>
 <div align="right" style="margin-bottom: 5;">[<a href="#__top__">Back to Top</a>]</div>
   </td></tr></tbody></table></td></tr>
 </table>
--- a/QEHeat/Doc/INPUT_ALL_CURRENTS.txt
+++ b/QEHeat/Doc/INPUT_ALL_CURRENTS.txt
@ -341,6 +341,35 @@ NAMELIST: &ENERGY_CURRENT
                   You may want to specify startingwfc = 'random' in the ELECTRONS namelist.
   +--------------------------------------------------------------------
   
+   +--------------------------------------------------------------------
+   Variable:       n_workers
+   
+   Type:           INTEGER
+   Default:        0
+   Description:    The calculation over all the trajectory is splitted in "n_workers" chunks. Then to run the code over all
+                   the trajectory you must run "n_workers" input files each one with a different "worker_id",
+                   from 0 to "n_workers" - 1 . Those inputs can run at the same time in the same folder. The "worker_id"
+                   will be appended to the outdir folder and to the "file_output" input variables, so you can safely run all
+                   the inputs in the same directory at the same time.
+   +--------------------------------------------------------------------
+   
+   +--------------------------------------------------------------------
+   Variable:       worker_id
+   
+   Type:           INTEGER
+   Default:        0
+   Description:    See "n_workers" variable
+   +--------------------------------------------------------------------
+   
+   +--------------------------------------------------------------------
+   Variable:       continue_not_converged
+   
+   Type:           LOGICAL
+   Default:        .false.
+   Description:    If it is not possible to find a ground state for a given frame of the trajectory, go to the next one.
+                   You will not find this step in the output file(s).
+   +--------------------------------------------------------------------
+   
 ===END OF NAMELIST======================================================


--- a/QEHeat/Doc/INPUT_ALL_CURRENTS.xml
+++ b/QEHeat/Doc/INPUT_ALL_CURRENTS.xml
@ -314,5 +314,31 @@ is written with some statistics. Note that if you don&apos;t specify at least <r
 You may want to specify startingwfc = &apos;random&apos; in the ELECTRONS namelist.
         </info>
      </var>
+      <var name="n_workers" type="INTEGER" >
+         <default> 0
+         </default>
+         <info>
+The calculation over all the trajectory is splitted in <ref>n_workers</ref> chunks. Then to run the code over all
+the trajectory you must run <ref>n_workers</ref> input files each one with a different <ref>worker_id</ref>,
+from 0 to <ref>n_workers</ref> - 1 . Those inputs can run at the same time in the same folder. The <ref>worker_id</ref>
+will be appended to the outdir folder and to the <ref>file_output</ref> input variables, so you can safely run all
+the inputs in the same directory at the same time.
+         </info>
+      </var>
+      <var name="worker_id" type="INTEGER" >
+         <default> 0
+         </default>
+         <info>
+See <ref>n_workers</ref> variable
+         </info>
+      </var>
+      <var name="continue_not_converged" type="LOGICAL" >
+         <default> .false.
+         </default>
+         <info>
+If it is not possible to find a ground state for a given frame of the trajectory, go to the next one.
+You will not find this step in the output file(s).
+         </info>
+      </var>
   </namelist>
 </input_description>
--- a/QEHeat/examples/README.md
+++ b/QEHeat/examples/README.md
@ -1,45 +1,39 @@
 # INSTALLATION OVERVIEW
-###
-#### Before running the examples, you need to be sure that QEHeat is installed.
-#### To compile, we suggest the following procedure. As usual in a QuantumEspresso installation, enter the distribution folder and run autoconf :
-####
-#### > ./configure
-#### 
-#### Than from the distribution folder:
-####
-#### >  make all_currents
-####
-#### This should produce in a single shot the executables pw.x and all_currents.x, the executable for QEHeat, in the respective src and bin folders.
-####
-#### TROUBLESHOOT: 
-#### If problems occurs, we suggest to enter the main folder of the distribution and compile only pw.x independently:
-####
-#### >  make clean
-#### >  make veryclean
-#### >  ./configure
-#### >  make pw
-####
-#### This way one can recognize if the problem comes from compiling QEHeat or the standard QuantumEspresso distribution.
-####
-~                                          
+
+ Before running the examples, you need to be sure that QEHeat is installed.
+ To compile, we suggest the following procedure. As usual in a QuantumEspresso installation, enter the distribution folder and run autoconf :
+
+```
+ > ./configure
+```
+
+ Than from the distribution folder:
+
+```
+ >  make all_currents
+```
+
+ This will produce the executable `all_currents.x`, the executable for QEHeat, in the respective src and bin folders.
+

 # EXAMPLES
-### 
-#### See also ../../Doc/INPUT_ALL_CURRENTS.html for a description of the inputs.
-#### To run an example, enter in the respective trajectory and execute the script run_example.sh. Modify it if necessary.
-#### In general, running a Qeheat calculation just needs the execution of the command: all_currents.x -in input_energycurrent , 
-#### after the input file has been prepared.
-####
-#### Each example comes with a reference folder where the output files can be compared with the ones produced by a new installation/run.
-#### Pseudopotentials can be downloaded from http://www.quantum-simulation.org/potentials/sg15_oncv/
-#### For the examples, the following pseudos were used:
-#### H_HSCV_PBE-1.0.upf,  O_HSCV_PBE-1.0.upf,  O_ONCV_PBE-1.0.upf,  Si_ONCV_PBE-1.1.upf 
-#### , which should be present in a folder examples/pseudo
-####
-#### Example 1 and 2 need a parallel installation to finish in a reasonable time. Example 1 was run in the reference calculation on 4 cores and Example 2 on 12. 
-####
-#### Example 3 can be easily run on a single core (serial) installation. Note that Example 3 requires the program cp.x to be installed.
-#### If this is not the case, you can enter the distribution folder and run "make cp"  
+
+ See also `../Doc/INPUT_ALL_CURRENTS.html` for a description of the inputs.
+ To run an example, enter in the respective trajectory and execute the script `run_example.sh`. Modify it if necessary.
+ In general, running a Qeheat calculation just needs the execution of the command: 
+  `all_currents.x -in input_energycurrent` , 
+ after the input file has been prepared.
+
+ Each example comes with a reference folder where the output files can be compared with the ones produced by a new installation/run.
+ Pseudopotentials can be downloaded from [http://www.quantum-simulation.org/potentials/sg15_oncv/]()
+ For the examples, the following pseudos were used:
+ H_HSCV_PBE-1.0.upf,  O_HSCV_PBE-1.0.upf,  O_ONCV_PBE-1.0.upf,  Si_ONCV_PBE-1.1.upf 
+ , which should be present in the folder `pseudo`
+
+ Example 1 and 2 need a parallel installation to finish in a reasonable time. Example 1 was run in the reference calculation on 4 cores and Example 2 on 12. 
+
+ Example 3 can be easily run on a single core (serial) installation. Note that Example 3 requires the program `cp.x` to be installed, for example with `make cp`
+ from the main distribution folder.



@ -73,7 +67,7 @@ Only file_output.dat needs to be used to evaluate the thermal conductivity coeff
 - `file_output` : this reports the total energy current divided in individual components, if a more specific analysis is needed. This file is mainly thought for development purposes.

 `file_output.dat` comes with a header specifying the output units. The same units are used in the more detailed current decomposion given in `file_output`.
- See also the description ../../Doc/INPUT_ALL_CURRENTS.html 
+ See also the description ../Doc/INPUT_ALL_CURRENTS.html 



@ -124,7 +118,7 @@ This example requires the program cp.x to be installed. If this is not the case,

 Note that the trajectory produced by cp.x will be probably different due to the stochasticity inherent in the Car-Parrinello molecular dynamics simulation. For exact comparison 
 with a novel installation one can substitute `trajdir='reference/traj/cp'` and comment in the run_example_water script the call to cp.x. This way the files produced by `file_output`
-should be comparable with the reference. 
+should be comparable with the reference, up to numerical noise that is always present in the finite difference derivative with non perfectly converged wavefunctions.



--- a/QEHeat/src/all_currents.f90
+++ b/QEHeat/src/all_currents.f90
@ -13,8 +13,9 @@
 ! implements Marcolongo, Umari, and Baroni, Nat. Phys. 12, 80 (2016)
 ! details of the original implementation are described in
 !
-! Marcolongo, Bertossa, Tisi and Baroni
-! https://arxiv.org/abs/2104.06383 (2021)
+! Marcolongo, Bertossa, Tisi and Baroni, Computer Physics Communications, 269, 108090 (2021)
+! https://doi.org/10.1016/j.cpc.2021.108090
+! https://arxiv.org/abs/2104.06383
 !-----------------------------------------------------------------------

 program all_currents
@ -38,9 +39,11 @@ program all_currents
   use wavefunctions, only: evc
   use wvfct, only: nbnd, npwx, npw
   use kinds, only: dp
+
   ! utilities to read the cp.x produced trajectory
   use cpv_traj, only: cpv_trajectory, &
                       cpv_trajectory_initialize, cpv_trajectory_deallocate
+
   use dynamics_module, only: vel
   use ions_base, ONLY: tau, tau_format, nat
   USE control_flags, ONLY: ethr
@ -55,8 +58,11 @@ program all_currents
   USE read_input, ONLY: read_input_file
   USE command_line_options, ONLY: input_file_, command_line, ndiag_, nimage_
   USE check_stop, ONLY: check_stop_init
+
 !from ../Modules/read_input.f90
   USE read_namelists_module, ONLY: read_namelists
+
+   use input_parameters, only : outdir
   USE read_cards_module, ONLY: read_cards
   use averages, only: online_average, online_average_init

@ -72,7 +78,6 @@ program all_currents
   use wvfct, ONLY: g2kin, et
   use fft_base, only: dffts
   use atom, only: rgrid
-
   ! testing only!!!
   use test_h_psi, only: init_test

@ -102,7 +107,9 @@ program all_currents
   CHARACTER(len=256) :: file_output, trajdir = ''
   type(online_average) :: ave_cur
   real(kind=DP) ::delta_t, ethr_small_step, ethr_big_step
-   integer :: first_step, last_step, step_mul, step_rem, n_repeat_every_step
+   integer :: first_step, last_step, step_mul, step_rem, n_workers, worker_id, &
+              n_repeat_every_step, n_digit, &
+              first_s_chunk, last_s_chunk, steps_per_chunk
   logical :: restart ! if true try to read last calculated step from output and set first_step
   logical :: subtract_cm_vel ! if true do velocity renormalization
   logical :: re_init_wfc_1 = .false., re_init_wfc_2 = .false. ! initialize again evc before scf step number 1 or 2
@ -112,9 +119,9 @@ program all_currents
   ! note: i_current_b is proportional to the ionic velocities. In principle is not needed to calculate the thermal
   ! conductivity since it does not influence the final result. It is implemented only for a cubic cell.

-   character(len=256) :: vel_input_units = 'PW'
+   character(len=256) :: vel_input_units = 'PW', worker_id_char, format_string
   logical :: ec_test, hpsi_test ! activates tests for debugging purposes
-
+   logical :: continue_not_converged ! don't stop the calculation if a step does not converge
   !from ../PW/src/pwscf.f90
   include 'laxlib.fh'

@ -127,12 +134,19 @@ program all_currents
   CALL environment_start('QEHeat')
   call start_clock('all_currents')
   IF (ionode) THEN
-      write (*,*) 'This code implements Marcolongo, A., Umari, P. and Baroni, S'
-      write (*,*) ' Nature Phys 12, 80-84 (2016). https://doi.org/10.1038/nphys3509'
      write (*,*) ''
-      write (*,*) 'The details of the implementation are described in'
-      write (*,*) ' Marcolongo, Bertossa, Tisi, Baroni,'
-      write (*,*) ' https://arxiv.org/abs/2104.06383 (2021)'
+      write (*,*) '============================================================'
+      write (*,*) '============================================================'
+      write (*,*) ' This code implements Marcolongo, A., Umari, P. and Baroni, S'
+      write (*,*) '  Nature Phys 12, 80-84 (2016). https://doi.org/10.1038/nphys3509'
+      write (*,*) ''
+      write (*,*) ' The details of the implementation are described in'
+      write (*,*) '  Marcolongo, Bertossa, Tisi and Baroni'
+      write (*,*) '  Computer Physics Communications, 269, 108090 (2021)'
+      write (*,*) '  https://doi.org/10.1016/j.cpc.2021.108090'
+      write (*,*) '  https://arxiv.org/abs/2104.06383'
+      write (*,*) '============================================================'
+      write (*,*) '============================================================'
      write (*,*) ''
      CALL input_from_file()
      ! all_currents input
@ -145,16 +159,45 @@ program all_currents
                                       step_rem, ec_test, add_i_current_b, &
                                       save_dvpsi, re_init_wfc_1, re_init_wfc_2, &
                                       re_init_wfc_3, three_point_derivative, &
-                                       n_repeat_every_step, hpsi_test)
+                                       n_repeat_every_step, hpsi_test, &
+                                       n_workers, worker_id, &
+                                       continue_not_converged)
+     !if the problem is parallelized simply by running many times the code over the same trajectory
+     !with a different starting and ending timestep you can use the n_worker and the worker_id variables
+     !
+     ! if n_workers > 0, append worker_id to file_output
+     ! set first/last step accordingly
+     ! append worker_id to outdir
+     if (n_workers>0 ) then
+        if (worker_id >= n_workers .or. worker_id<0) then
+           call errore ('all_currents', 'worker_id must be one of 0, 1, ..., n_workers-1')
+        end if
+        n_digit = floor(log10(real(n_workers+1)))
+        write (format_string, '(A2,I1,A1)') "(I",n_digit, ")"
+
+        write (worker_id_char, format_string) worker_id
+        file_output=trim(file_output) // '.'//trim(worker_id_char)
+        ! calculate first step / last step for the chunk
+        steps_per_chunk = (last_step-first_step + 1)/n_workers
+        if (steps_per_chunk < 0) call errore('all_currents', 'last_step must be greater than first_step',1)
+        if (steps_per_chunk == 0 ) then
+           steps_per_chunk = 1
+           write(*,*) 'WARNING: n_workers is too high: some chunks will have no work' 
+        end if
+        first_s_chunk = first_step + steps_per_chunk*worker_id
+        if (worker_id < n_workers - 1 ) then
+           last_s_chunk = first_step + steps_per_chunk*(worker_id+1)
+        else
+           last_s_chunk = last_step
+        end if
+
+        write (*,*) 'This worker has steps from ', first_s_chunk, ' to ', last_s_chunk
+        first_step=first_s_chunk
+        last_step=last_s_chunk - 1
+
+     end if

   endif
-   ! PW input
-   call read_namelists('PW', 5)
-   call read_cards('PW', 5)
-
-   call check_input()
-
-   call mp_barrier(intra_pool_comm)
   call bcast_all_current_namelist( &
      delta_t, &
      file_output, trajdir, vel_input_units, &
@ -164,7 +207,21 @@ program all_currents
      step_rem, ec_test, add_i_current_b, &
      save_dvpsi, re_init_wfc_1, re_init_wfc_2, &
      re_init_wfc_3, three_point_derivative, &
-      n_repeat_every_step, hpsi_test)
+      n_repeat_every_step, hpsi_test, &
+      n_workers, worker_id, &
+      continue_not_converged)
+   ! PW input
+   call read_namelists('PW', 5)
+   if (n_workers>0 ) then
+      CALL mp_bcast(worker_id_char, ionode_id, world_comm)
+      outdir = trim(outdir) //trim(worker_id_char)
+   endif
+   
+   call read_cards('PW', 5)
+
+   call check_input()
+
+   call mp_barrier(intra_pool_comm)
   if (vel_input_units == 'CP') then ! atomic units of cp are different
      vel_factor = 2.0_dp
      if (ionode) &
@ -274,8 +331,12 @@ program all_currents
            call sum_band()
         end if
         ethr = ethr_big_step
-         call run_pwscf(exit_status)
-         if (exit_status /= 0) goto 100 !shutdown everything and exit
+         call run_electrons(exit_status, continue_not_converged)
+         if (exit_status == 2 .and. continue_not_converged) then 
+              continue
+         else if (exit_status /= 0) then
+             goto 100 !shutdown everything and exit
+         end if
         !save evc, tau and vel for t-dt
         call scf_result_set_from_global_variables(scf_all%t_minus)
         if (three_point_derivative) then
@ -285,9 +346,13 @@ program all_currents
               call sum_band()
            end if
            ethr = ethr_small_step
-            call run_pwscf(exit_status)
+            call run_electrons(exit_status, continue_not_converged)
            !evc_due = evc
-            if (exit_status /= 0) goto 100 !shutdown everything and exit
+            if (exit_status == 2 .and. continue_not_converged) then 
+               continue
+            else if (exit_status /= 0) then
+               goto 100 !shutdown everything and exit
+            end if
            !save evc, tau and vel for t
            call scf_result_set_from_global_variables(scf_all%t_zero)
            if (hpsi_test) &
@ -311,8 +376,12 @@ program all_currents
            call sum_band()
         end if
         ethr = ethr_small_step
-         call run_pwscf(exit_status)
-         if (exit_status /= 0) goto 100 !shutdown everything and exit
+         call run_electrons(exit_status, continue_not_converged)
+         if (exit_status == 2 .and. continue_not_converged) then 
+              continue
+         else if (exit_status /= 0) then 
+              goto 100 !shutdown everything and exit
+         end if
         !save evc, tau and vel for t+dt
         call scf_result_set_from_global_variables(scf_all%t_plus)

@ -476,7 +545,9 @@ contains
                                          step_rem, ec_test, add_i_current_b, &
                                          save_dvpsi, re_init_wfc_1, re_init_wfc_2, &
                                          re_init_wfc_3, three_point_derivative, &
-                                          n_repeat_every_step, hpsi_test)
+                                          n_repeat_every_step, hpsi_test, &
+                                          n_workers, worker_id, &
+                                          continue_not_converged)
      use io_global, ONLY: stdout, ionode, ionode_id
      implicit none
      integer, intent(in) :: iunit
@ -484,7 +555,8 @@ contains
      logical, intent(inout) :: save_dvpsi
      CHARACTER(len=256), intent(inout) :: file_output, trajdir
      real(kind=DP), intent(inout) ::delta_t, ethr_small_step, ethr_big_step
-      integer, intent(inout) :: first_step, last_step, step_mul, step_rem, n_repeat_every_step
+      integer, intent(inout) :: first_step, last_step, step_mul, step_rem, n_repeat_every_step, &
+                                n_workers, worker_id
      logical, intent(inout) :: restart
      logical, intent(inout) :: subtract_cm_vel
      logical, intent(inout) :: re_init_wfc_1, re_init_wfc_2
@ -494,7 +566,7 @@ contains
      character(len=256), intent(inout) :: vel_input_units
      logical, intent(inout) :: ec_test, hpsi_test ! activates tests for debugging purposes
      integer, intent(out) :: n_max
-
+      logical, intent(inout) :: continue_not_converged
      integer :: ios

      NAMELIST /energy_current/ delta_t, &
@ -505,7 +577,8 @@ contains
         step_rem, ec_test, add_i_current_b, &
         save_dvpsi, re_init_wfc_1, re_init_wfc_2, &
         re_init_wfc_3, three_point_derivative, &
-         n_repeat_every_step, hpsi_test
+         n_repeat_every_step, hpsi_test, &
+         n_workers, worker_id, continue_not_converged
      !
      !   set default values for variables in namelist
      !
@ -532,6 +605,9 @@ contains
      vel_input_units = 'PW'
      n_repeat_every_step = 1
      hpsi_test = .false.
+      n_workers = 0
+      worker_id = 0
+      continue_not_converged = .false.
      READ (iunit, energy_current, IOSTAT=ios)
      IF (ios /= 0) CALL errore('main', 'reading energy_current namelist', ABS(ios))

@ -546,7 +622,9 @@ contains
      step_rem, ec_test, add_i_current_b, &
      save_dvpsi, re_init_wfc_1, re_init_wfc_2, &
      re_init_wfc_3, three_point_derivative, &
-      n_repeat_every_step, hpsi_test)
+      n_repeat_every_step, hpsi_test, &
+      n_workers, worker_id, &
+      continue_not_converged)
      use io_global, ONLY: stdout, ionode, ionode_id
      use mp_world, ONLY: mpime, world_comm
      use mp, ONLY: mp_bcast
@ -555,7 +633,8 @@ contains
      logical, intent(inout) :: save_dvpsi
      CHARACTER(len=256), intent(inout) :: file_output, trajdir
      real(kind=DP), intent(inout) ::delta_t, ethr_small_step, ethr_big_step
-      integer, intent(inout) :: first_step, last_step, step_mul, step_rem, n_repeat_every_step
+      integer, intent(inout) :: first_step, last_step, step_mul, step_rem, n_repeat_every_step, &
+                                n_workers, worker_id
      logical, intent(inout) :: restart
      logical, intent(inout) :: subtract_cm_vel
      logical, intent(inout) :: re_init_wfc_1, re_init_wfc_2
@ -565,6 +644,7 @@ contains
      character(len=256), intent(inout) :: vel_input_units
      logical, intent(inout) :: ec_test, hpsi_test
      integer, intent(out) :: n_max
+      logical, intent(inout) :: continue_not_converged
      CALL mp_bcast(trajdir, ionode_id, world_comm)
      CALL mp_bcast(delta_t, ionode_id, world_comm)
      CALL mp_bcast(eta, ionode_id, world_comm)
@ -587,7 +667,10 @@ contains
      CALL mp_bcast(three_point_derivative, ionode_id, world_comm)
      CALL mp_bcast(n_repeat_every_step, ionode_id, world_comm)
      CALL mp_bcast(hpsi_test, ionode_id, world_comm)
-
+      CALL mp_bcast(n_workers, ionode_id, world_comm)
+      CALL mp_bcast(worker_id, ionode_id, world_comm)
+      CALL mp_bcast(vel_input_units, ionode_id, world_comm)
+      CALL mp_bcast(continue_not_converged, ionode_id, world_comm)
   end subroutine

   subroutine set_first_step_restart(restart, file_output, first_step)
@ -647,12 +730,13 @@ contains
      end if
   end subroutine

-   subroutine run_pwscf(exit_status)
+   subroutine run_electrons(exit_status, continue_not_converged)
      USE control_flags, ONLY: conv_elec, gamma_only, ethr, lscf, treinit_gvecs
      USE check_stop, ONLY: check_stop_init, check_stop_now
      USE qexsd_module, ONLY: qexsd_set_status
      implicit none
      INTEGER, INTENT(OUT) :: exit_status
+      logical, intent(in) :: continue_not_converged
      exit_status = 0
      call start_clock('PWSCF')
      IF (.NOT. lscf) THEN
@ -661,9 +745,10 @@ contains
         CALL electrons()
      END IF
      call stop_clock('PWSCF')
-      IF (check_stop_now() .OR. .NOT. conv_elec) THEN
+      IF (.NOT. conv_elec) exit_status = 2
+      IF (check_stop_now() .OR. & 
+             ( (.NOT. conv_elec ) .and. ( .not. continue_not_converged)) ) THEN
         IF (check_stop_now()) exit_status = 255
-         IF (.NOT. conv_elec) exit_status = 2
         CALL qexsd_set_status(exit_status)
         CALL punch('config')
         RETURN
--- a/QEHeat/src/kohn_sham_mod.f90
+++ b/QEHeat/src/kohn_sham_mod.f90
@ -25,6 +25,8 @@ contains

   end subroutine

+
+
   subroutine current_kohn_sham(J, J_a, J_b, J_el, dt, scf_all, &
                                dvpsi_save, save_dvpsi, &
                                nbnd, npw, npwx, dffts, evc, g, ngm, gstart, &
@ -43,6 +45,7 @@ contains
      use compute_charge_mod, only: compute_charge
      use project_mod, only: project
      use extrapolation, only: update_pot
+      use wave_gauge, only: compute_dot_evc_parallel_gauge

      use test_h_psi, only: test

@ -99,42 +102,10 @@ contains
                      (xk(3, 1) + g(3, igk_k(ig, 1)))**2)*tpiba2
      end do

-      sa = 0.d0
-      sb = 0.d0

-! computed sb = < evc_due, evc_uno >, sa = <evc_uno, evc_uno> remove contribution at G=0
-! sa is a multiple of the identity if 2 points are used (t_plus==t_zero)
-      call dgemm('T', 'N', nbnd, nbnd, 2*npw, 2.d0, scf_all%t_plus%evc, 2*npwx, scf_all%t_zero%evc, 2*npwx, 0.d0, sa, nbnd)
-      call dgemm('T', 'N', nbnd, nbnd, 2*npw, 2.d0, scf_all%t_minus%evc, 2*npwx, scf_all%t_zero%evc, 2*npwx, 0.d0, sb, nbnd)
-      if (gstart == 2) then
-         do ibnd = 1, nbnd
-            do jbnd = 1, nbnd
-               sa(ibnd, jbnd) = sa(ibnd, jbnd) - dble(conjg(scf_all%t_plus%evc(1, ibnd))*scf_all%t_zero%evc(1, jbnd))
-               sb(ibnd, jbnd) = sb(ibnd, jbnd) - dble(conjg(scf_all%t_minus%evc(1, ibnd))*scf_all%t_zero%evc(1, jbnd))
-            end do
-         end do
-      end if
-      call mp_sum(sa, intra_pool_comm)
-      call mp_sum(sb, intra_pool_comm)
-! computes phi_v^c_punto using <s,s>
+      call compute_dot_evc_parallel_gauge(scf_all%t_minus%evc, scf_all%t_zero%evc, scf_all%t_plus%evc,&
+                                    evp, nbnd, npw, npwx, gstart)

-      call dgemm('T', 'N', nbnd, nbnd, nbnd, 1.d0, sa, nbnd, sa, nbnd, 0.d0, ssa, nbnd)
-      call dgemm('T', 'N', nbnd, nbnd, nbnd, 1.d0, sb, nbnd, sb, nbnd, 0.d0, ssb, nbnd)
-
-      evp = 0.d0
-      do ibnd = 1, nbnd
-         do jbnd = 1, nbnd
-            do ig = 1, npw
-
-               evp(ig, ibnd) = evp(ig, ibnd) + scf_all%t_plus%evc(ig, jbnd)*sa(jbnd, ibnd)
-               evp(ig, ibnd) = evp(ig, ibnd) - scf_all%t_zero%evc(ig, jbnd)*ssa(ibnd, jbnd)
-
-               evp(ig, ibnd) = evp(ig, ibnd) - scf_all%t_minus%evc(ig, jbnd)*sb(jbnd, ibnd)
-               evp(ig, ibnd) = evp(ig, ibnd) + scf_all%t_zero%evc(ig, jbnd)*ssb(ibnd, jbnd)
-
-            end do
-         end do
-      end do
      evp(:, :) = evp(:, :)/dt

 !   end if
--- a/QEHeat/src/make.depend
+++ b/QEHeat/src/make.depend
@ -90,6 +90,7 @@ kohn_sham_mod.o : ../../Modules/fft_base.o
 kohn_sham_mod.o : ../../Modules/io_global.o
 kohn_sham_mod.o : ../../Modules/kind.o
 kohn_sham_mod.o : ../../Modules/mp_pools.o
+kohn_sham_mod.o : ../../Modules/wave_gauge.o
 kohn_sham_mod.o : ../../PW/src/pwcom.o
 kohn_sham_mod.o : ../../PW/src/update_pot.o
 kohn_sham_mod.o : ../../UtilXlib/mp.o
--- a/QEHeat/src/zero_mod.f90
+++ b/QEHeat/src/zero_mod.f90
@ -124,7 +124,6 @@ contains
 !auxiliary variables
      integer ::err, ir, ieta
      real(DP) ::R
-      real(DP), allocatable :: values(:)
      real(DP), allocatable   :: charge(:)
      complex(DP), allocatable   :: u_g(:, :) !ngm,a
      complex(DP), allocatable ::charge_g(:)