From 25b02439aa699badbad09d9eeb2bb318bdf3bbc1 Mon Sep 17 00:00:00 2001
From: cavazzon <cavazzon@c92efa57-630b-4861-b058-cf58834340f0>
Date: Tue, 18 Apr 2006 07:33:11 +0000
Subject: [PATCH] - fix for NLCC contribution to forces and stress - more BGL
 porting - clean-ups

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@3020 c92efa57-630b-4861-b058-cf58834340f0
---
 CPV/cg_sub.f90            |   2 +-
 CPV/chargedensity.f90     | 172 +++++++++++++++++++---------------
 CPV/cplib.f90             | 188 +++++++++++++++-----------------------
 CPV/cpr.f90               |   2 +-
 CPV/cprsub.f90            |  56 +++++++-----
 CPV/exch_corr.f90         | 105 +++++++++++----------
 CPV/inner_loop.f90        |   2 +-
 CPV/move_electrons.f90    |   6 --
 CPV/nl_base.f90           |  17 ++--
 CPV/nlcc.f90              |  10 +-
 CPV/ortho_base.f90        |   8 +-
 CPV/pseudopot.f90         |  56 +++++++-----
 CPV/runcp.f90             | 184 +++++++++++++++++++++++++++++++++++--
 CPV/smcp.f90              |  16 +---
 CPV/stress.f90            |  42 +++++----
 CPV/wf.f90                |  45 +++------
 Modules/control_flags.f90 |   3 +-
 Modules/read_upf.f90      |   5 +
 PW/stres_loc.f90          |   3 +-
 19 files changed, 534 insertions(+), 388 deletions(-)

diff --git a/CPV/cg_sub.f90 b/CPV/cg_sub.f90
index 03819e667..709d7127c 100644
--- a/CPV/cg_sub.f90
+++ b/CPV/cg_sub.f90
@@ -14,7 +14,7 @@
       lambdap, lambda  )
 
       use kinds, only: dp
-      use control_flags, only: iprint, thdyn, tpre, tbuff, iprsta, &
+      use control_flags, only: iprint, thdyn, tpre, iprsta, &
             tfor, tvlocw, taurdr, tprnfor
       use control_flags, only: ndr, ndw, nbeg, nomore, tsde, tortho, tnosee, &
             tnosep, trane, tranp, tsdp, tcp, tcap, ampre, amprp, tnoseh
diff --git a/CPV/chargedensity.f90 b/CPV/chargedensity.f90
index 2780faf19..bbf3f4f76 100644
--- a/CPV/chargedensity.f90
+++ b/CPV/chargedensity.f90
@@ -32,7 +32,7 @@
 !  end of module-scope declarations
 !  ----------------------------------------------
 
-        PUBLIC :: rhoofr, fillgrad
+        PUBLIC :: rhoofr, fillgrad, checkrho
 
         INTERFACE rhoofr
            MODULE PROCEDURE rhoofr_fpmd, rhoofr_cp
@@ -359,6 +359,7 @@
   END SUBROUTINE rhoofr_fpmd
 !=----------------------------------------------------------------------=!
 
+
 !-----------------------------------------------------------------------
    SUBROUTINE rhoofr_cp (nfi,c,irb,eigrb,bec,rhovan,rhor,rhog,rhos,enl,ekin)
 !-----------------------------------------------------------------------
@@ -374,7 +375,7 @@
 !     e_v = sum_i,ij rho_i,ij d^ion_is,ji
 !
       USE kinds,              ONLY: DP
-      USE control_flags,      ONLY: iprint, tbuff, iprsta, thdyn, tpre, trhor
+      USE control_flags,      ONLY: iprint, iprsta, thdyn, tpre, trhor
       USE ions_base,          ONLY: nat
       USE gvecp,              ONLY: ngm
       USE gvecs,              ONLY: ngs, nps, nms
@@ -511,35 +512,24 @@
             END DO
 
             CALL invfft('Wave',psis,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
-
-            !     wavefunctions in unit 21
             !
-            IF(tbuff) WRITE(21,iostat=ios) psis
-            iss1=ispin(i)
-            sa1=f(i)/omega
-            IF (i.NE.n) THEN
-               iss2=ispin(i+1)
-               sa2=f(i+1)/omega
+            iss1 = ispin(i)
+            sa1  = f(i) / omega
+            IF ( i .NE. n ) THEN
+               iss2 = ispin(i+1)
+               sa2  = f(i+1) / omega
             ELSE
-               iss2=iss1
-               sa2=0.0
+               iss2 = iss1
+               sa2  = 0.0
             END IF
-            DO ir=1,nnrsx
-               rhos(ir,iss1)=rhos(ir,iss1) + sa1*( DBLE(psis(ir)))**2
-               rhos(ir,iss2)=rhos(ir,iss2) + sa2*(AIMAG(psis(ir)))**2
-            END DO
-
             !
-            !       buffer 21
-            !     
-            IF(tbuff) THEN
-               IF(ios.NE.0) CALL errore(' rhoofr',' error in writing unit 21',ios)
-            ENDIF
+            DO ir = 1, nnrsx
+               rhos(ir,iss1) = rhos(ir,iss1) + sa1 * ( DBLE(psis(ir)))**2
+               rhos(ir,iss2) = rhos(ir,iss2) + sa2 * (AIMAG(psis(ir)))**2
+            END DO
             !
          END DO
          !
-         IF(tbuff) REWIND 21
-         !
          !     smooth charge in g-space is put into rhog(ig)
          !
          IF(nspin.EQ.1)THEN
@@ -597,39 +587,15 @@
                rhor(ir,isdw)=AIMAG(psi(ir))
             END DO
          ENDIF
-         IF (dft_is_meta()) CALL kedtauofr_meta(c, psi, psis) ! METAGGA
-!
-         IF(iprsta.GE.3)THEN
-            DO iss=1,nspin
-               rsumg(iss)=omega*DBLE(rhog(1,iss))
-               rsumr(iss)=SUM(rhor(:,iss))*omega/DBLE(nr1*nr2*nr3)
-            END DO
-
-            IF ( gstart /= 2 ) THEN
-               !
-               !    in the parallel case, only one processor has G=0 ! 
-               !
-               DO iss=1,nspin
-                  rsumg(iss)=0.0
-               END DO
-            END IF
-            CALL mp_sum( rsumg( 1:nspin ), intra_image_comm )
-            CALL mp_sum( rsumr( 1:nspin ), intra_image_comm )
-
-            IF ( nspin == 1 ) THEN
-              WRITE( stdout, 10) rsumg(1), rsumr(1)
-            ELSE
-              WRITE( stdout, 20) rsumg(1), rsumr(1), rsumg(2), rsumr(2)
-            ENDIF
-
-         ENDIF
+         !
+         IF ( dft_is_meta() ) CALL kedtauofr_meta( c, psi, psis ) ! METAGGA
          !
          !     add vanderbilt contribution to the charge density
          !     drhov called before rhov because input rho must be the smooth part
          !
-         IF (tpre) CALL drhov(irb,eigrb,rhovan,rhog,rhor)
+         IF ( tpre ) CALL drhov( irb, eigrb, rhovan, rhog, rhor )
          !
-         CALL rhov(irb,eigrb,rhovan,rhog,rhor)
+         CALL rhov( irb, eigrb, rhovan, rhog, rhor )
 
          rhopr = rhor
 
@@ -639,31 +605,18 @@
 !
 !     here to check the integral of the charge density
 !
-!
-      IF( iprsta .GE. 2 ) THEN
-         CALL checkrho(nnrx,nspin,rhor,rmin,rmax,rsum,rnegsum)
-         rnegsum=rnegsum*omega/DBLE(nr1*nr2*nr3)
-         rsum=rsum*omega/DBLE(nr1*nr2*nr3)
-         WRITE( stdout,'(a,4(1x,f12.6))')                                     &
+      IF( ( iprsta >= 2 ) .OR. ( nfi == 0 ) .OR. &
+          ( MOD(nfi, iprint) == 0 ) .AND. ( .NOT. tcg ) ) THEN
+
+         IF( iprsta >= 2 ) THEN
+            CALL checkrho( nnrx, nspin, rhor, rmin, rmax, rsum, rnegsum )
+            rnegsum = rnegsum * omega / DBLE(nr1*nr2*nr3)
+            rsum    = rsum    * omega / DBLE(nr1*nr2*nr3)
+            WRITE( stdout,'(a,4(1x,f12.6))')                                     &
      &     ' rhoofr: rmin rmax rnegsum rsum  ',rmin,rmax,rnegsum,rsum
-      END IF
-!
-      IF( nfi == 0 .OR. MOD(nfi, iprint) == 0 .AND. .NOT. tcg ) THEN
-
-         DO iss=1,nspin
-            rsumg(iss)=omega*DBLE(rhog(1,iss))
-            rsumr(iss)=SUM(rhor(:,iss),1)*omega/DBLE(nr1*nr2*nr3)
-         END DO
-
-         IF (gstart.NE.2) THEN
-            ! in the parallel case, only one processor has G=0 ! 
-            DO iss=1,nspin
-               rsumg(iss)=0.0
-            END DO
          END IF
 
-         CALL mp_sum( rsumg( 1:nspin ), intra_image_comm )
-         CALL mp_sum( rsumr( 1:nspin ), intra_image_comm )
+         CALL sum_charge( rsumg, rsumr )
 
          IF ( nspin == 1 ) THEN
            WRITE( stdout, 10) rsumg(1), rsumr(1)
@@ -688,7 +641,38 @@
 
 !
       RETURN
-      END SUBROUTINE rhoofr_cp
+
+
+   CONTAINS   
+      !
+      !
+      SUBROUTINE sum_charge( rsumg, rsumr )
+         !
+         REAL(DP), INTENT(OUT) :: rsumg( : )
+         REAL(DP), INTENT(OUT) :: rsumr( : )
+         INTEGER :: iss
+         !
+         DO iss=1,nspin
+            rsumg(iss)=omega*DBLE(rhog(1,iss))
+            rsumr(iss)=SUM(rhor(:,iss),1)*omega/DBLE(nr1*nr2*nr3)
+         END DO
+
+         IF (gstart.NE.2) THEN
+            ! in the parallel case, only one processor has G=0 !
+            DO iss=1,nspin
+               rsumg(iss)=0.0
+            END DO
+         END IF
+
+         CALL mp_sum( rsumg( 1:nspin ), intra_image_comm )
+         CALL mp_sum( rsumr( 1:nspin ), intra_image_comm )
+
+         RETURN
+      END SUBROUTINE
+
+!-----------------------------------------------------------------------
+   END SUBROUTINE rhoofr_cp
+!-----------------------------------------------------------------------
 
 
 
@@ -758,6 +742,44 @@
     END SUBROUTINE fillgrad
 
 
+!
+!----------------------------------------------------------------------
+      SUBROUTINE checkrho(nnr,nspin,rhor,rmin,rmax,rsum,rnegsum)
+!----------------------------------------------------------------------
+!
+!     check \int rho(r)dr and the negative part of rho
+!
+      USE kinds,     ONLY: DP
+      USE mp,        ONLY: mp_sum
+      USE mp_global, ONLY: intra_image_comm
+
+      IMPLICIT NONE
+
+      INTEGER nnr, nspin
+      REAL(DP) rhor(nnr,nspin), rmin, rmax, rsum, rnegsum
+      !
+      REAL(DP) roe
+      INTEGER ir, iss
+!
+      rsum   =0.0
+      rnegsum=0.0
+      rmin   =100.
+      rmax   =0.0 
+      DO iss = 1, nspin
+         DO ir = 1, nnr
+            roe  = rhor(ir,iss)
+            rsum = rsum + roe
+            IF ( roe < 0.0 ) rnegsum = rnegsum + roe
+            rmax = MAX( rmax, roe )
+            rmin = MIN( rmin, roe )
+         END DO
+      END DO
+      CALL mp_sum( rsum, intra_image_comm )
+      CALL mp_sum( rnegsum, intra_image_comm )
+      RETURN
+    END SUBROUTINE checkrho
+
+
 !=----------------------------------------------------------------------=!
       END MODULE charge_density
 !=----------------------------------------------------------------------=!
diff --git a/CPV/cplib.f90 b/CPV/cplib.f90
index 6cc0bfa7c..af92461a6 100644
--- a/CPV/cplib.f90
+++ b/CPV/cplib.f90
@@ -501,7 +501,7 @@
 !              sum_i,ij d^q_i,ij (-i)**l beta_i,i(g) 
 !                                 e^-ig.r_i < beta_i,j | c_n >}
       USE kinds, ONLY: dp
-      USE control_flags, ONLY: iprint, tbuff
+      USE control_flags, ONLY: iprint
       USE gvecs
       USE gvecw, ONLY: ngw
       USE cvan, ONLY: ish
@@ -546,26 +546,14 @@
 !
       ci=(0.0,1.0)
 !
-      IF (.NOT.tbuff) THEN
-!
-         psi (:) = (0.d0, 0.d0)
-         DO ig=1,ngw
+      psi (:) = (0.d0, 0.d0)
+      DO ig=1,ngw
             psi(nms(ig))=CONJG(c(ig)-ci*ca(ig))
             psi(nps(ig))=c(ig)+ci*ca(ig)
-         END DO
-!
-         CALL invfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
+      END DO
+
+      CALL invfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
 !     
-      ELSE
-!
-!     read psi from buffer 21
-!
-         READ(21,iostat=ios) psi
-         IF(ios.NE.0) CALL errore                                        &
-     &       (' dforce',' error in reading unit 21',ios)
-!
-      ENDIF
-! 
       iss1=ispin(i)
 !
 ! the following avoids a potential out-of-bounds error
@@ -796,26 +784,27 @@
             END DO
          END DO
       END DO
-!
-      IF (nvb.EQ.0) RETURN
-!
+
+      IF ( nvb == 0 ) RETURN
+
       ALLOCATE( v( nnr ) )
       ALLOCATE( qv( nnrb ) )
       ALLOCATE( dqgbt( ngb, 2 ) )
 
-      ci=(0.,1.)
-!
-      IF(nspin.EQ.1) THEN
-!     ------------------------------------------------------------------
-!     nspin=1 : two fft at a time, one per atom, if possible
-!     ------------------------------------------------------------------
+      ci =( 0.0d0, 1.0d0 )
+
+      IF( nspin == 1 ) THEN
+         !  
+         !  nspin=1 : two fft at a time, one per atom, if possible
+         ! 
          DO i=1,3
             DO j=1,3
-!
+
                v(:) = (0.d0, 0.d0)
-!
+
                iss=1
                isa=1
+
                DO is=1,nvb
 #ifdef __PARA
                   DO ia=1,na(is)
@@ -827,9 +816,9 @@
 #endif
                      dqgbt(:,:) = (0.d0, 0.d0) 
                      IF (ia.EQ.na(is)) nfft=1
-!
-!  nfft=2 if two ffts at the same time are performed
-!
+                     !
+                     !  nfft=2 if two ffts at the same time are performed
+                     !
                      DO ifft=1,nfft
                         DO iv=1,nh(is)
                            DO jv=iv,nh(is)
@@ -848,9 +837,9 @@
                            END DO
                         END DO
                      END DO
-!     
-! add structure factor
-!
+                     !     
+                     ! add structure factor
+                     !
                      qv(:) = (0.d0, 0.d0)
                      IF(nfft.EQ.2) THEN
                         DO ig=1,ngb
@@ -869,36 +858,37 @@
                      ENDIF
 !
                      CALL invfft('Box',qv,nr1b,nr2b,nr3b,nr1bx,nr2bx,nr3bx,isa)
-!
-!  qv = US contribution in real space on box grid
-!       for atomic species is, real(qv)=atom ia, imag(qv)=atom ia+1
-!
-!  add qv(r) to v(r), in real space on the dense grid
-!
-                     CALL box2grid(irb(1,isa),1,qv,v)
+                     !
+                     !  qv = US contribution in real space on box grid
+                     !       for atomic species is, real(qv)=atom ia, imag(qv)=atom ia+1
+                     !
+                     !  add qv(r) to v(r), in real space on the dense grid
+                     !
+                     CALL box2grid( irb(1,isa), 1, qv, v )
                      IF (nfft.EQ.2) CALL box2grid(irb(1,isa+1),2,qv,v)
-  15                 isa=isa+nfft
+
+  15                 isa = isa + nfft
 !
                   END DO
                END DO
 !
                DO ir=1,nnr
-                  drhor(ir,iss,i,j)=drhor(ir,iss,i,j)+DBLE(v(ir))
+                  drhor(ir,iss,i,j) = drhor(ir,iss,i,j) + DBLE(v(ir))
                END DO
 !
-               CALL fwfft('Dense', v,nr1,nr2,nr3,nr1x,nr2x,nr3x)
+               CALL fwfft( 'Dense', v, nr1, nr2, nr3, nr1x, nr2x, nr3x )
 !
                DO ig=1,ng
-                  drhog(ig,iss,i,j)=drhog(ig,iss,i,j)+v(np(ig))
+                  drhog(ig,iss,i,j) = drhog(ig,iss,i,j) + v(np(ig))
                END DO
 !
             ENDDO
          ENDDO
 !
       ELSE
-!     ------------------------------------------------------------------
-!     nspin=2: two fft at a time, one for spin up and one for spin down
-!     ------------------------------------------------------------------
+         !
+         !     nspin=2: two fft at a time, one for spin up and one for spin down
+         ! 
          isup=1
          isdw=2
          DO i=1,3
@@ -929,9 +919,9 @@
                            END DO
                         END DO
                      END DO
-!     
-! add structure factor
-!
+                     !     
+                     ! add structure factor
+                     !
                      qv(:) = (0.d0, 0.d0)
                      DO ig=1,ngb
                         qv(npb(ig))= eigrb(ig,isa)*dqgbt(ig,1)        &
@@ -941,14 +931,16 @@
                      END DO
 !
                      CALL invfft('Box',qv,nr1b,nr2b,nr3b,nr1bx,nr2bx,nr3bx,isa)
-!
-!  qv is the now the US augmentation charge for atomic species is
-!  and atom ia: real(qv)=spin up, imag(qv)=spin down
-!
-!  add qv(r) to v(r), in real space on the dense grid
-!
+                     !
+                     !  qv is the now the US augmentation charge for atomic species is
+                     !  and atom ia: real(qv)=spin up, imag(qv)=spin down
+                     !
+                     !  add qv(r) to v(r), in real space on the dense grid
+                     !
                      CALL box2grid2(irb(1,isa),qv,v)
-  25                 isa=isa+1
+                     !
+  25                 isa = isa + 1
+                     !
                   END DO
                END DO
 !
@@ -2674,7 +2666,7 @@
       COMPLEX(DP), ALLOCATABLE :: v(:), vs(:)
       REAL(DP), ALLOCATABLE    :: gagb(:,:)
       !
-      REAL(DP) :: fion1( 3, nat )
+      REAL(DP), ALLOCATABLE :: fion1( :, : )
       REAL(DP), ALLOCATABLE :: stmp( :, : )
       !
       COMPLEX(DP), ALLOCATABLE :: self_vloc(:)
@@ -2682,6 +2674,7 @@
       REAL(DP)                 :: self_ehtet, fpibg
       LOGICAL                  :: ttsic
       REAL(DP)                 :: detmp( 3, 3 ), desr( 6 ), deps( 6 )
+      REAL(DP)                 :: detmp2( 3, 3 )
       REAL(DP)                 :: deht( 6 )
 !
       INTEGER, DIMENSION(6), PARAMETER :: alpha = (/ 1,2,3,2,3,3 /)
@@ -2709,15 +2702,11 @@
       !
       ttsic = ( ABS( self_interaction ) /= 0 )
       !
-      IF( tpre .AND. ttsic ) &
-      &   CALL errore( ' vofrho ', ' in cplib tpre and ttsic are not possible ', 1 )
-
       IF( ttsic ) ALLOCATE( self_vloc( ng ) )
       !
       !     first routine in which fion is calculated: annihilation
       !
       fion  = 0.d0
-      fion1 = 0.d0
       !
       !     forces on ions, ionic term in real space
       !
@@ -2727,7 +2716,7 @@
          !
          CALL r_to_s( tau0, stmp, na, nsp, ainv )
          !
-         CALL vofesr( 0, esr, desr, fion, stmp, tpre, h )
+         CALL vofesr( 1, esr, desr, fion, stmp, tpre, h )
          !
          call mp_sum( fion, intra_image_comm )
          !
@@ -2755,7 +2744,6 @@
             drhotmp( 1:ng, :, : ) = drhotmp( 1:ng, :, : ) + drhog( 1:ng, 2, :, : )
          ENDIF
       END IF
-
       !
       !     calculation local potential energy
       !
@@ -2769,17 +2757,23 @@
       epseu = wz * DBLE(SUM(vtemp))
       IF (gstart == 2) epseu=epseu-vtemp(1)
       CALL mp_sum( epseu, intra_image_comm )
+
       epseu = epseu * omega
+
 !
       IF( tpre ) THEN
          !
          CALL denps_box( drhotmp, sfac, vtemp, dps )
+         !
          CALL pseudo_stress( deps, 0.0d0, gagb, sfac, dvps, rhog, omega )
+         !
          call mp_sum( deps, intra_image_comm )
+         !
          DO k = 1, 6
             detmp( alpha(k), beta(k) ) = deps(k)
             detmp( beta(k), alpha(k) ) = detmp( alpha(k), beta(k) )
          END DO
+         !
          dps = dps + MATMUL( detmp(:,:), TRANSPOSE( ainv(:,:) ) )
          !
       END IF
@@ -2791,9 +2785,7 @@
      !
       self_ehtet = 0.d0  
       !
-      IF( ALLOCATED( self_vloc ) ) THEN
-         self_vloc = 0.d0 
-      END IF
+      IF( ttsic ) self_vloc = 0.d0 
 
       DO is=1,nsp
          DO ig=1,ngs
@@ -2842,11 +2834,16 @@
       END IF
       IF(tpre) DEALLOCATE(drhotmp)
 
-!     ===================================================================
-!     forces on ions, ionic term in reciprocal space
-!     -------------------------------------------------------------------
-      IF( tprnfor .OR. tfor .OR. tpre)                                                  &
-     &    CALL force_ps(rhotmp,rhog,vtemp,ei1,ei2,ei3,fion1)
+      !    
+      !     forces on ions, ionic term in reciprocal space
+      !     
+      ALLOCATE( fion1( 3, nat ) )
+      !
+      fion1 = 0.d0
+      !
+      IF( tprnfor .OR. tfor .OR. tpre) THEN
+          CALL force_ps(rhotmp,rhog,vtemp,ei1,ei2,ei3,fion1)
+      END IF
 
       !
       !     calculation hartree + local pseudo potential
@@ -2930,8 +2927,10 @@
          fion = fion + fion1
 
       END IF
+
+      DEALLOCATE( fion1 )
 !
-      IF( ALLOCATED( self_vloc ) ) DEALLOCATE( self_vloc )
+      IF( ttsic ) DEALLOCATE( self_vloc )
 !
 !     ===================================================================
 !     fourier transform of total potential to r-space (dense grid)
@@ -3093,42 +3092,3 @@
 !
 
       END SUBROUTINE vofrho
-
-!
-!----------------------------------------------------------------------
-      SUBROUTINE checkrho(nnr,nspin,rhor,rmin,rmax,rsum,rnegsum)
-!----------------------------------------------------------------------
-!
-!     check \int rho(r)dr and the negative part of rho
-!
-      USE kinds,     ONLY: DP
-      USE mp,        ONLY: mp_sum
-      USE mp_global, ONLY: intra_image_comm
-
-      IMPLICIT NONE
-
-      INTEGER nnr, nspin
-      REAL(DP) rhor(nnr,nspin), rmin, rmax, rsum, rnegsum
-      !
-      REAL(DP) roe
-      INTEGER ir, iss
-!
-      rsum   =0.0
-      rnegsum=0.0
-      rmin   =100.
-      rmax   =0.0
-      DO iss = 1, nspin
-         DO ir = 1, nnr
-            roe  = rhor(ir,iss)
-            rsum = rsum + roe
-            IF ( roe < 0.0 ) rnegsum = rnegsum + roe
-            rmax = MAX( rmax, roe )
-            rmin = MIN( rmin, roe )
-         END DO
-      END DO
-      CALL mp_sum( rsum, intra_image_comm )
-      CALL mp_sum( rnegsum, intra_image_comm )
-      RETURN
-    END SUBROUTINE checkrho
-!______________________________________________________________________
-
diff --git a/CPV/cpr.f90 b/CPV/cpr.f90
index a199d0aed..d84361296 100644
--- a/CPV/cpr.f90
+++ b/CPV/cpr.f90
@@ -13,7 +13,7 @@ SUBROUTINE cprmain( tau, fion_out, etot_out )
   !
   USE kinds,                    ONLY : DP
   USE constants,                ONLY : bohr_radius_angs, amu_au
-  USE control_flags,            ONLY : iprint, isave, thdyn, tpre, tbuff,      &
+  USE control_flags,            ONLY : iprint, isave, thdyn, tpre,       &
                                        iprsta, tfor, tvlocw,      &
                                        taurdr, tprnfor, tsdc, lconstrain, lwf, &
                                        lneb, lcoarsegrained, ndr, ndw, nomore, &
diff --git a/CPV/cprsub.f90 b/CPV/cprsub.f90
index 9f7b1f170..16cd0a440 100644
--- a/CPV/cprsub.f90
+++ b/CPV/cprsub.f90
@@ -183,7 +183,7 @@ subroutine nlfh( bec, dbec, lambda )
   !
   USE kinds,          ONLY : DP
   use cvan,           ONLY : nvb, ish
-  use uspp,           ONLY : nhsa => nkb, qq
+  use uspp,           ONLY : nkb, qq
   use uspp_param,     ONLY : nh, nhm
   use ions_base,      ONLY : na
   use electrons_base, ONLY : nbspx, nbsp, nudx, nspin, nupdwn, iupdwn
@@ -195,11 +195,12 @@ subroutine nlfh( bec, dbec, lambda )
 !
   implicit none
 
-  real(DP), intent(in) ::  bec( nhsa, nbsp ), dbec( nhsa, nbsp, 3, 3 )
+  real(DP), intent(in) ::  bec( nkb, nbsp ), dbec( nkb, nbsp, 3, 3 )
   real(DP), intent(in) ::  lambda( nudx, nudx, nspin )
 !
-  integer   :: i, j, ii, jj, inl, iv, jv, ia, is, iss, nss, istart
-  real(DP) :: fpre(3,3)
+  INTEGER  :: i, j, ii, jj, inl, iv, jv, ia, is, iss, nss, istart
+  INTEGER  :: jnl
+  REAL(DP) :: fpre(3,3), TT, T1, T2
   !
   REAL(DP), ALLOCATABLE :: tmpbec(:,:), tmpdh(:,:), temp(:,:)
   !
@@ -208,6 +209,7 @@ subroutine nlfh( bec, dbec, lambda )
       fpre(:,:) = 0.d0
       do ii=1,3
          do jj=1,3
+
             do is=1,nvb
                do ia=1,na(is)
 !
@@ -241,30 +243,27 @@ subroutine nlfh( bec, dbec, lambda )
                      if(nh(is).gt.0)then
 
                         temp = 0.d0
-!
+
                         call MXMA                                          &
      &                    (tmpdh,1,nudx,tmpbec,1,nhm,temp,1,nudx,nss,nh(is),nss)
-!
-                        ! do j=1,nss
-                        !    do i=1,nss
-                        !       temp(i,j)=temp(i,j)*lambda(i,j,iss)
-                        !    end do
-                        ! end do
-                        ! fpre(ii,jj)=fpre(ii,jj)+2.*SUM(temp(1:nss,1:nss))
-                        !
-                        DO j = 1, nss
-                           DO i = 1, nss
-                              fpre(ii,jj) = fpre(ii,jj) + 2.0d0*temp(i,j)*lambda(i,j,iss)
-                           END DO
-                        END DO
 
+                        do j=1,nss
+                           do i=1,nss
+                              temp(i,j)=temp(i,j)*lambda(i,j,iss)
+                           end do
+                        end do
+                        fpre(ii,jj)=fpre(ii,jj)+2.0d0*SUM(temp(1:nss,1:nss))
+                       
                      endif
                      !
                   end do
                   !
                end do
+               !
             end do
+            !
          end do
+         !
       end do
 
       do i=1,3
@@ -274,14 +273,17 @@ subroutine nlfh( bec, dbec, lambda )
          enddo
       enddo
 
+      DEALLOCATE ( tmpbec, tmpdh, temp )
+
       IF( iprsta >= 2 ) THEN
+         WRITE( stdout,*) 
          WRITE( stdout,*) "constraints contribution to stress"
          WRITE( stdout,5555) ((-fpre(i,j),j=1,3),i=1,3)
          fpre = MATMUL( fpre, TRANSPOSE( h ) ) / omega * au_gpa * 10.0d0
          WRITE( stdout,5555) ((fpre(i,j),j=1,3),i=1,3)
+         WRITE( stdout,*) 
       END IF
 !
-      DEALLOCATE ( tmpbec, tmpdh, temp )
 
 5555  FORMAT(1x,f12.5,1x,f12.5,1x,f12.5/                                &
      &       1x,f12.5,1x,f12.5,1x,f12.5/                                &
@@ -319,8 +321,7 @@ subroutine nlinit
       use core,            ONLY : rhocb, nlcc_any, allocate_core
       use constants,       ONLY : pi, fpi
       use ions_base,       ONLY : na, nsp
-      use uspp,            ONLY : aainit, beta, qq, dvan, nhtol, nhtolm, indv,&
-                                  nhsa => nkb, nhsavb=>nkbus
+      use uspp,            ONLY : aainit, beta, qq, dvan, nhtol, nhtolm, indv
       use uspp_param,      ONLY : kkbeta, qqq, nqlc, betar, lmaxq, dion,&
                                   nbeta, nbetam, lmaxkb, lll, nhm, nh, tvanp
       use atom,            ONLY : mesh, r, rab, nlcc, numeric
@@ -533,14 +534,18 @@ subroutine dqvan2b(ngy,iv,jv,is,ylm,dylm,dqg)
 
   ivs=indv(iv,is)
   jvs=indv(jv,is)
+  !
   if (ivs >= jvs) then
      ijvs = ivs*(ivs-1)/2 + jvs
   else
      ijvs = jvs*(jvs-1)/2 + ivs
   end if
+  !
   ! ijvs is the packed index for (ivs,jvs)
+  !
   ivl=nhtolm(iv,is)
   jvl=nhtolm(jv,is)
+  !
   if (ivl > nlx .OR. jvl > nlx) &
        call errore (' qvan2 ', ' wrong dimensions (2)', MAX(ivl,jvl))
   !
@@ -578,15 +583,16 @@ subroutine dqvan2b(ngy,iv,jv,is,ylm,dylm,dqg)
      !     
      !       sig= (-i)^l
      !
-     sig=(0.,-1.)**(l-1)
-     sig=sig*ap(lp,ivl,jvl) 
-     ! WRITE(200,'(3I4,2D14.6)') ijvs,l,is,SUM(qradb(:,ijvs,l,is))
+     sig = (0.0d0,-1.0d0)**(l-1)
+     !
+     sig = sig * ap( lp, ivl, jvl ) 
+     !
      do ij=1,3
         do ii=1,3
            do ig=1,ngy
               dqg(ig,ii,ij) = dqg(ig,ii,ij) +  sig *                &
  &                    ( ylm(ig,lp) * dqrad(ig,ijvs,l,is,ii,ij) -    &
- &                     dylm(ig,lp,ii,ij)*qradb(ig,ijvs,l,is)   ) ! SEGNO
+ &                     dylm(ig,lp,ii,ij) * qradb(ig,ijvs,l,is)   ) ! SEGNO
            end do
         end do
      end do
diff --git a/CPV/exch_corr.f90 b/CPV/exch_corr.f90
index 2a2582dc2..f66a00351 100644
--- a/CPV/exch_corr.f90
+++ b/CPV/exch_corr.f90
@@ -181,29 +181,29 @@
       IF ( ANY( tnlcc ) ) THEN
 
         DO ig = gstart, ngm
-          tex1 = (0.0_DP , 0.0_DP)
+          tex1 = ( 0.0d0 , 0.0d0 )
           DO is=1,nsp
             IF ( tnlcc(is) ) THEN
               tex1 = tex1 + sfac( ig, is ) * CMPLX(rhocp(ig,is), 0.d0)
             END IF
           END DO
-          tex2 = 0.0_DP
+          tex2 = 0.0d0
           DO ispin = 1, nspin
             tex2 = tex2 + CONJG( vxc(ig, ispin) )
           END DO
           tex3 = DBLE(tex1 * tex2) / SQRT( g( ig ) ) / tpiba
           dcc = dcc + tex3 * gagb(:,ig)
         END DO
-        dcc = dcc * 2.0_DP * omega
+        dcc = dcc * 2.0d0 * omega
 
         ! DEBUG
-        ! DO k=1,6
-        !   detmp(alpha(k),beta(k)) = dcc(k)
-        !   detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
-        ! END DO
-        ! detmp = MATMUL( detmp(:,:), box%m1(:,:) )
-        ! WRITE( stdout,*) "derivative of e(xc) - nlcc part"
-        ! WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
+        !DO k=1,6
+        !  detmp(alpha(k),beta(k)) = dcc(k)
+        !  detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
+        !END DO
+        !detmp = MATMUL( detmp(:,:), box%m1(:,:) )
+        !WRITE( stdout,*) "derivative of e(xc) - nlcc part"
+        !WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
 
 
       END IF
@@ -391,10 +391,13 @@
       self_exc = 0.d0
 !
       if( dft_is_meta() ) then
+         !
          call tpssmeta( nnr, nspin, gradr, rhor, kedtaur, exc )
+         !
       else
+         !
          CALL exch_corr_cp(nnr, nspin, gradr, rhor, exc)
-!
+         !
          IF ( ttsic ) THEN
             CALL exch_corr_cp(nnr, nspin, self_gradr, self_rho, self_exc)
             self_exc = sic_alpha * (exc - self_exc)
@@ -415,14 +418,8 @@
       !
       dxc = 0.0d0
       !
-      if (tpre) then
+      if ( tpre ) then
          !
-         IF( nlcc_any ) CALL  denlcc( nnr, nspin, rhor, sfac, drhocg, dcc )
-         !
-         ! DEBUG
-         !
-         ! write (stdout,*) "derivative of e(xc) - nlcc part"
-         ! write (stdout,5555) ((dcc(i,j),j=1,3),i=1,3)
          !
          do iss = 1, nspin
             do j=1,3
@@ -432,26 +429,15 @@
                   end do
                end do
             end do
-            drc = 0.0d0
-            IF( nlcc_any ) THEN
-               do j=1,3
-                  do i=1,3
-                     do ir=1,nnr
-                        drc(i,j) = drc(i,j) + rhor( ir, iss ) * rhoc( ir ) * ainv(j,i)
-                     end do
-                  end do
-               end do
-            END IF
-            dxc = dxc - drc * 1.0d0 / nspin
          end do
          !
          dxc = dxc * omega / ( nr1*nr2*nr3 )
          !
          call mp_sum ( dxc, intra_image_comm )
          !
-         do j=1,3
-            do i=1,3
-               dxc(i,j) = dxc(i,j) + exc * ainv(j,i)
+         do j = 1, 3
+            do i = 1, 3
+               dxc( i, j ) = dxc( i, j ) + exc * ainv( j, i )
             end do
          end do
          !
@@ -460,21 +446,19 @@
          ! write (stdout,*) "derivative of e(xc)"
          ! write (stdout,5555) ((dxc(i,j),j=1,3),i=1,3)
          !
-         dxc = dxc + dcc
+         IF( iprsta >= 2 ) THEN
+            DO i=1,3
+               DO j=1,3
+                  detmp(i,j)=exc*ainv(j,i)
+               END DO
+            END DO
+            WRITE( stdout,*) "derivative of e(xc) - diag - kbar"
+            detmp = -1.0d0 * MATMUL( detmp, TRANSPOSE( h ) ) / omega * au_gpa * 10.0d0
+            WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
+         END IF
          !
       end if
       !
-      IF( iprsta >= 2 ) THEN
-         DO i=1,3
-            DO j=1,3
-               detmp(i,j)=exc*ainv(j,i)
-            END DO
-         END DO
-         WRITE( stdout,*) "derivative of e(xc) - diag - kbar"
-         detmp = -1.0d0 * MATMUL( detmp, TRANSPOSE( h ) ) / omega * au_gpa * 10.0d0
-         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
-      END IF
-      !
       !
       if (dft_is_gradient()) then
          !
@@ -496,11 +480,10 @@
 
       IF( ttsic ) THEN
 !
-      !
          IF (dft_is_gradient()) then
       
             call gradh( nspin, self_gradr, self_rhog, self_rho, dexc)
-         !
+          
             gradr(:,:, 1) = (1.d0 - sic_alpha ) * gradr(:,:, 1)
             gradr(:,:, 2) = (1.d0 - sic_alpha ) * gradr(:,:, 2) + &
                           &  sic_alpha * ( self_gradr(:,:,1) + self_gradr(:,:,2) )
@@ -516,6 +499,36 @@
 !
       ENDIF
 
+      IF( tpre ) THEN
+         !
+         dcc = 0.0d0
+         !
+         IF( nlcc_any ) CALL  denlcc( nnr, nspin, rhor, sfac, drhocg, dcc )
+         !
+         ! DEBUG
+         !
+         ! write (stdout,*) "derivative of e(xc) - nlcc part"
+         ! write (stdout,5555) ((dcc(i,j),j=1,3),i=1,3)
+         !
+         dxc = dxc + dcc
+         !
+         do iss = 1, nspin
+            drc = 0.0d0
+            IF( nlcc_any ) THEN
+               do j=1,3
+                  do i=1,3
+                     do ir=1,nnr
+                        drc(i,j) = drc(i,j) + rhor( ir, iss ) * rhoc( ir ) * ainv(j,i)
+                     end do
+                  end do
+               end do
+               call mp_sum ( drc, intra_image_comm )
+            END IF
+            dxc = dxc - drc * ( 1.0d0 / nspin ) * omega / ( nr1*nr2*nr3 )
+         end do
+         !
+      END IF
+
       IF( ALLOCATED( gradr ) ) DEALLOCATE( gradr )
 
 5555  format(1x,f12.5,1x,f12.5,1x,f12.5/                                &
diff --git a/CPV/inner_loop.f90 b/CPV/inner_loop.f90
index 32f00c9dc..9e09acc60 100644
--- a/CPV/inner_loop.f90
+++ b/CPV/inner_loop.f90
@@ -20,7 +20,7 @@
 
       ! declares modules
       USE kinds,          ONLY: dp
-      USE control_flags,  ONLY: iprint, thdyn, tpre, tbuff, iprsta, &
+      USE control_flags,  ONLY: iprint, thdyn, tpre, iprsta, &
                                 tfor, tvlocw, taurdr, &
                                 tprnfor, ndr, ndw, nbeg, nomore, &
                                 tsde, tortho, tnosee, tnosep, trane, &
diff --git a/CPV/move_electrons.f90 b/CPV/move_electrons.f90
index 0c4aaad65..62dc5ca42 100644
--- a/CPV/move_electrons.f90
+++ b/CPV/move_electrons.f90
@@ -170,12 +170,6 @@ SUBROUTINE move_electrons( nfi, tfirst, tlast, b1, b2, b3, fion, &
         lambdam = lambda
      END IF
      !
-     IF( force_pairing ) THEN
-        lambda( 1:nupdwn(2), 1:nupdwn(2), 2 ) =  lambda(1:nupdwn(2), 1:nupdwn(2), 1 )
-        lambdam( 1:nupdwn(2), 1:nupdwn(2), 2 ) = lambdam(1:nupdwn(2), 1:nupdwn(2), 1 )
-        lambdap( 1:nupdwn(2), 1:nupdwn(2), 2 ) = lambdap(1:nupdwn(2), 1:nupdwn(2), 1 )
-     ENDIF
-     ! 
      ! ... calphi calculates phi
      ! ... the electron mass rises with g**2
      !
diff --git a/CPV/nl_base.f90 b/CPV/nl_base.f90
index 479879d0c..b4f9537bd 100644
--- a/CPV/nl_base.f90
+++ b/CPV/nl_base.f90
@@ -494,9 +494,9 @@
 !
 !
       call start_clock( 'calbec' )
-      call nlsm1(n,nspmn,nspmx,eigr,c,bec)
+      call nlsm1( n, nspmn, nspmx, eigr, c, bec )
 !
-      if (iprsta.gt.2) then
+      if ( iprsta > 2 ) then
          WRITE( stdout,*)
          do is=1,nspmx
             if(nspmx.gt.1) then
@@ -590,6 +590,8 @@ SUBROUTINE caldbec( ngw, nkb, n, nspmn, nspmx, eigr, c, dbec, tred )
                  ixi = 1
                  signre = -1.0
                  signim =  1.0
+              else
+                 CALL errore(' caldbec  ', ' l not implemented ', ABS( l ) )
               endif
               !
               do ia=1,na(is)
@@ -636,13 +638,13 @@ subroutine dennl( bec, denl )
   USE kinds,      ONLY : DP
   use cvan,       only : ish
   use uspp_param, only : nh
-  use uspp,       only : nkb, dvan
+  use uspp,       only : nkb, dvan, deeq
   use cdvan,      ONLY : drhovan, dbec
   use ions_base,  only : nsp, na
   use cell_base,  only : h
   use io_global,  only : stdout
   !
-  use electrons_base,     only : n => nbsp, ispin, f, nspin
+  use electrons_base,     only : n => nbsp, ispin, f, nspin, ispin
   use reciprocal_vectors, only : gstart
 
   implicit none
@@ -677,17 +679,18 @@ subroutine dennl( bec, denl )
                     enddo
                  enddo
               end do
-              dsum=0.d0
+              !
               do iss=1,nspin
+                 dsum=0.d0
                  do k=1,3
                     do j=1,3
                        drhovan(ijv,isa,iss,j,k)=dsums(iss,j,k)
                        dsum(j,k)=dsum(j,k)+dsums(iss,j,k)
                     enddo
                  enddo
+                 if(iv.ne.jv) dsum=2.d0*dsum
+                 denl = denl + dsum * dvan(jv,iv,is)
               end do
-              if(iv.ne.jv) dsum=2.d0*dsum
-              denl = denl + dsum*dvan(jv,iv,is)
            end do
         end do
      end do
diff --git a/CPV/nlcc.f90 b/CPV/nlcc.f90
index 4e216c4a0..0facd65f1 100644
--- a/CPV/nlcc.f90
+++ b/CPV/nlcc.f90
@@ -464,10 +464,10 @@
                   end do
                end if
 !
-               call invfft('Box',qv,nr1b,nr2b,nr3b,nr1bx,nr2bx,nr3bx,isa)
-!
-! note that a factor 1/2 is hidden in fac if nspin=2
-!
+               call invfft('Box',qv,nr1b,nr2b,nr3b,nr1bx,nr2bx,nr3bx,ia+isa)
+               !
+               ! note that a factor 1/2 is hidden in fac if nspin=2
+               !
                do iss=1,nspin
                   fcc(ix,ia+isa) = fcc(ix,ia+isa) + fac *               &
      &                 boxdotgrid(irb(1,ia  +isa),1,qv,vxc(1,iss))
@@ -495,7 +495,7 @@
 
 !
 !-----------------------------------------------------------------------
-      subroutine set_cc(irb,eigrb,rhoc)
+      subroutine set_cc( irb, eigrb, rhoc )
 !-----------------------------------------------------------------------
 !
 !     Calculate core charge contribution in real space, rhoc(r)
diff --git a/CPV/ortho_base.f90 b/CPV/ortho_base.f90
index b9303200c..66878f122 100644
--- a/CPV/ortho_base.f90
+++ b/CPV/ortho_base.f90
@@ -935,8 +935,8 @@ CONTAINS
 !
       IF ( iprsta > 2 ) THEN
          WRITE( stdout,*)
-         DO is=1,nsp
-            IF(nsp.GT.1) THEN
+         DO is = 1, nvb
+            IF( nvb .GT. 1 ) THEN
                WRITE( stdout,'(33x,a,i4)') ' updatc: bec (is)',is
                WRITE( stdout,'(8f9.4)')                                       &
      &            ((bec(ish(is)+(iv-1)*na(is)+1,i+istart-1),iv=1,nh(is)),i=1,nss)
@@ -1070,8 +1070,8 @@ CONTAINS
 
          WRITE( stdout,*) 'in calphi sqrt(emtot)=',SQRT(emtot)
          WRITE( stdout,*)
-         DO is=1,nsp
-            IF(nsp > 1) THEN
+         DO is = 1, nvb
+            IF( nvb > 1 ) THEN
                WRITE( stdout,'(33x,a,i4)') ' calphi: bec (is)',is
                WRITE( stdout,'(8f9.4)')                                       &
      &            ((bec(ish(is)+(iv-1)*na(is)+1,i),iv=1,nh(is)),i=1,n)
diff --git a/CPV/pseudopot.f90 b/CPV/pseudopot.f90
index 7a93f5caa..f1d0b658c 100644
--- a/CPV/pseudopot.f90
+++ b/CPV/pseudopot.f90
@@ -727,8 +727,9 @@ CONTAINS
       !
       IF( ALLOCATED( betagx  ) ) DEALLOCATE( betagx )
       IF( ALLOCATED( dbetagx ) ) DEALLOCATE( dbetagx )
+      !
       ALLOCATE( betagx ( mmx, nhm, nsp ) )
-      IF (tpre) ALLOCATE( dbetagx( mmx, nhm, nsp ) )
+      IF ( tpre ) ALLOCATE( dbetagx( mmx, nhm, nsp ) )
 
       !
       do is = 1, nsp
@@ -738,6 +739,7 @@ CONTAINS
             allocate( djl  ( kkbeta( is ) ) )
             allocate( jltmp( kkbeta( is ) ) )
          end if
+         !
          allocate( fint ( kkbeta( is ) ) )
          allocate( jl   ( kkbeta( is ) ) )
          !
@@ -758,21 +760,29 @@ CONTAINS
                   !
                   i0 = 1
                   if ( r(1,is) < 1.0d-8 ) i0 = 2
-                  ! special case q=0
+                  !
                   if ( xg < 1.0d-8 ) then
+                     !
+                     ! special case q=0
+                     !
                      if (l == 1) then
                         ! Note that dj_1/dx (x=0) = 1/3
                         jltmp(:) = 1.0d0/3.d0
                      else
                         jltmp(:) = 0.0d0
                      end if
+                     !
                   else
+                     !
                      call sph_bes (kkbeta(is)+1-i0, r(i0,is), xg, ltmp-1, jltmp )
+                     !
                   end if
+                  !
                   do ir = i0, kkbeta(is)
                      xrg = r(ir,is) * xg
                      djl(ir) = jltmp(ir) * xrg - l * jl(ir)
                   end do
+                  !
                   if ( i0 == 2 ) djl(1) = djl(2)
                   !
                endif
@@ -804,6 +814,7 @@ CONTAINS
 !
          deallocate(jl)
          deallocate(fint)
+         !
          if (tpre) then
             deallocate(jltmp)
             deallocate(djl)
@@ -874,7 +885,6 @@ CONTAINS
          ALLOCATE( qrl( nr, nbeta(is)*(nbeta(is)+1)/2, nqlc(is)) )
          !
          call fill_qrl ( is, qrl )
-         ! qrl = 0.0d0
          !
          do l = 1, nqlc( is )
             !
@@ -1152,7 +1162,6 @@ CONTAINS
                do ig=1,ngb
                   do l=1,nqlc(is)
                      qradb(ig,ijv,l,is)= c*qradx(ig,ijv,l,is)
-                     WRITE(100,'(4I4,2D14.6)') ig,ijv,l,is,qradb(ig,ijv,l,is)
                   enddo
                enddo
             enddo
@@ -1354,11 +1363,6 @@ CONTAINS
                   do j=1,3
                      dbeta( 1, iv, is, i, j ) = -0.5 * beta( 1, iv, is ) * ainv( j, i )    &
      &                                          - c * dylm( 1, lp, i, j ) * betagl         ! SEGNO
-                     
-                     !dbeta(1,iv,is,i,j)=-0.5*beta(1,iv,is)*ainv(j,i)  !DEBUG stress ok!
-                     !dbeta(1,iv,is,i,j)=-c*dylm(1,lp,i,j)*betagl  !DEBUG stress ok!
-                     !dbeta(1,iv,is,i,j)=0.0d0  !DEBUG stress
-
                   enddo
                enddo
                do ig = gstart, ngw
@@ -1375,11 +1379,6 @@ CONTAINS
      &                    - c * dylm( ig, lp, i, j ) * betagl                  &  ! SEGNO
      &                    - c * ylm ( ig, lp )       * dbetagl * gx( i, ig ) / g( ig )         &
      &                    * ( gx( 1, ig ) * ainv( j, 1 ) + gx( 2, ig ) * ainv( j, 2 ) + gx( 3, ig ) * ainv( j, 3 ) )
-                        !dbeta(ig,iv,is,i,j)=- 0.5d0 * beta( ig, iv, is ) * ainv( j, i )  !DEBUG stress ok!
-                        !dbeta(ig,iv,is,i,j)=- c * dylm( ig, lp, i, j ) * betagl  !DEBUG stress ok!
-                        !dbeta( ig, iv, is, i, j ) =                            &
-                        !  - c * ylm ( ig, lp )       * dbetagl * gx( i, ig ) / g( ig )         &
-                        !  * ( gx( 1, ig ) * ainv( j, 1 ) + gx( 2, ig ) * ainv( j, 2 ) + gx( 3, ig ) * ainv( j, 3 ) )
                      end do
                   end do
                end do
@@ -1451,7 +1450,10 @@ CONTAINS
          do iv= 1,nbeta(is)
             do jv = iv, nbeta(is)
                ijv = jv*(jv-1)/2 + iv
-               do ig=1,ngb
+               do l=1,nqlc(is)
+                  qradb(1,ijv,l,is) = c * qradx(1,ijv,l,is)
+               end do
+               do ig=2,ngb
                   gg=gb(ig)*tpibab*tpibab/refg
                   jj=int(gg)+1
                   do l=1,nqlc(is)
@@ -1495,7 +1497,7 @@ CONTAINS
          allocate(dqgbs(ngb,3,3))
          dqrad(:,:,:,:,:,:) = 0.d0
          !
-         call dylmr2_(lmaxq*lmaxq, ngb, gxb, gb, ainv, dylmb)
+         call dylmr2_( lmaxq*lmaxq, ngb, gxb, gb, ainv, dylmb )
          !
          do is=1,nvb
             !
@@ -1503,7 +1505,8 @@ CONTAINS
                do jv=iv,nbeta(is)
                   ijv = jv*(jv-1)/2 + iv
                   do l=1,nqlc(is)
-                     do ig=1,ngb
+                     dqradb(1,ijv,l,is) =  dqradx(1,ijv,l,is)
+                     do ig=2,ngb
                         gg=gb(ig)*tpibab*tpibab/refg
                         jj=int(gg)+1
                         if(jj.ge.mmx) then
@@ -1516,18 +1519,15 @@ CONTAINS
                      enddo
                      do i=1,3
                         do j=1,3
-                           dqrad(1,ijv,l,is,i,j) = &
-                                -qradb(1,ijv,l,is) * ainv(j,i)
-                           ! dqrad(1,ijv,l,is,i,j) = 0.d0 ! DEBUG
+                           dqrad(1,ijv,l,is,i,j) = - qradb(1,ijv,l,is) * ainv(j,i)
                            do ig=2,ngb
                               dqrad(ig,ijv,l,is,i,j) =                  &
-     &                          -qradb(ig,ijv,l,is)*ainv(j,i)           &
-     &                          -c*dqradb(ig,ijv,l,is)*                 &
+     &                          - qradb(ig,ijv,l,is)*ainv(j,i)          &  
+     &                          - c * dqradb(ig,ijv,l,is)*              &
      &                          gxb(i,ig)/gb(ig)*                       &
      &                          (gxb(1,ig)*ainv(j,1)+                   &
      &                           gxb(2,ig)*ainv(j,2)+                   &
      &                           gxb(3,ig)*ainv(j,3))
-                           ! dqrad(ig,ijv,l,is,i,j) = -qradb(ig,ijv,l,is)*ainv(j,i) ! DEBUG
                            enddo
                         enddo
                      enddo
@@ -1611,6 +1611,7 @@ CONTAINS
             allocate( djl  ( kkbeta( is ) ) )
             allocate( jltmp( kkbeta( is ) ) )
          end if
+         !
          allocate( fint ( kkbeta( is ) ) )
          allocate( jl   ( kkbeta( is ) ) )
          !
@@ -1677,6 +1678,7 @@ CONTAINS
 !
          deallocate(jl)
          deallocate(fint)
+         !
          if (tpre) then
             deallocate(jltmp)
             deallocate(djl)
@@ -1779,6 +1781,8 @@ CONTAINS
       IF ( kkbeta(is) > dim1 ) &
            CALL errore ('fill_qrl', 'bad 1st dimension for array qrl', 1)
       !
+      qrl = 0.0d0
+      !
       do iv = 1, nbeta(is)
          !
          do jv = iv, nbeta(is)
@@ -1793,8 +1797,10 @@ CONTAINS
             lmin = ABS (lll(jv,is) - lll(iv,is)) + 1
             lmax = lll(jv,is) + lll(iv,is) + 1
 
-            ! WRITE( stdout, * ) ' is, jv, iv = ', is, jv, iv
-            ! WRITE( stdout, * ) ' lll jv, iv = ', lll(jv,is), lll(iv,is)
+            ! WRITE( stdout, * ) 'QRL is, jv, iv = ', is, jv, iv
+            ! WRITE( stdout, * ) 'QRL lll jv, iv = ', lll(jv,is), lll(iv,is)
+            ! WRITE( stdout, * ) 'QRL lmin, lmax = ', lmin, lmax
+            ! WRITE( stdout, * ) '---------------- '
 
             IF ( lmin < 1 .OR. lmax > dim3) THEN
                  WRITE( stdout, * ) ' lmin, lmax = ', lmin, lmax
diff --git a/CPV/runcp.f90 b/CPV/runcp.f90
index 313faabcb..e962d67db 100644
--- a/CPV/runcp.f90
+++ b/CPV/runcp.f90
@@ -17,6 +17,7 @@
 
         PUBLIC :: runcp, runcp_force_pairing
         PUBLIC :: runcp_uspp, runcp_uspp_force_pairing, runcp_ncpp
+        PUBLIC :: runcp_uspp_bgl
 
 !=----------------------------------------------------------------------------=!
         CONTAINS
@@ -593,7 +594,7 @@
               fromscra, restart )
      !
      use wave_base, only: wave_steepest, wave_verlet
-     use control_flags, only: tbuff, lwf, tsde
+     use control_flags, only: lwf, tsde
      !use uspp, only : nhsa=> nkb, betae => vkb, rhovan => becsum, deeq
      use uspp, only : deeq, betae => vkb
      use reciprocal_vectors, only : gstart
@@ -687,20 +688,187 @@
      deallocate(c2)
      deallocate(c3)
 !
-!==== end of loop which updates electronic degrees of freedom
-!
-!     buffer for wavefunctions is unit 21
-!
-     if(tbuff) rewind 21
-
    END SUBROUTINE runcp_uspp
 !
+!
 !=----------------------------------------------------------------------------=!
+!
+!
+
+   SUBROUTINE runcp_uspp_bgl( nfi, fccc, ccc, ema0bg, dt2bye, rhos, bec, c0, cm, &
+              fromscra, restart )
+     !
+     use wave_base,              only: my_wave_steepest, my_wave_verlet
+     use control_flags,          only: lwf, tsde
+     use uspp,                   only: deeq, betae => vkb
+     use reciprocal_vectors,     only: gstart
+     use electrons_base,         only: n=>nbsp, nspin
+     use wannier_subroutines,    only: ef_potential
+     use efield_module,          only: dforce_efield, tefield, dforce_efield2, tefield2
+     use gvecw,                  only: ngw
+     use smooth_grid_dimensions, only: nr1s, nr2s, nr3s, nr1sx, nr2sx, nr3sx, nnrsx
+     USE fft_base,               ONLY: dffts
+     USE mp_global,              ONLY: me_image, nogrp, me_ogrp
+     USE parallel_include
+     use task_groups
+
+     !
+     IMPLICIT NONE
+     integer, intent(in) :: nfi
+     real(8) :: fccc, ccc
+     real(8) :: ema0bg(:), dt2bye
+     real(8) :: rhos(:,:)
+     real(8) :: bec(:,:)
+     complex(8) :: c0(:,:), cm(:,:)
+     logical, optional, intent(in) :: fromscra
+     logical, optional, intent(in) :: restart
+     !
+     real(8) ::  verl1, verl2, verl3
+     real(8), allocatable:: emadt2(:)
+     real(8), allocatable:: emaver(:)
+     complex(8), allocatable:: c2(:), c3(:)
+     integer :: i, index_in, index
+     integer :: iflag, ierr
+     logical :: ttsde
+
+     iflag = 0
+     IF( PRESENT( fromscra ) ) THEN
+       IF( fromscra ) iflag = 1
+     END IF
+     IF( PRESENT( restart ) ) THEN
+       IF( restart ) iflag = 2
+     END IF
+
+     !
+     ! ...  set verlet variables 
+     !
+     verl1 = 2.0d0 * fccc
+     verl2 = 1.0d0 - verl1
+     verl3 = 1.0d0 * fccc
+
+     allocate(c2(ngw))
+     allocate(c3(ngw))
+     ALLOCATE( emadt2( ngw ) )
+     ALLOCATE( emaver( ngw ) )
+
+     ccc    = fccc * dt2bye
+     emadt2 = dt2bye * ema0bg
+     emaver = emadt2 * verl3
+
+     IF( iflag == 0 ) THEN
+       ttsde  = tsde
+     ELSE IF( iflag == 1 ) THEN
+       ttsde = .TRUE.
+     ELSE IF( iflag == 2 ) THEN
+       ttsde = .FALSE.
+     END IF
+
+     if( lwf ) then
+        !
+        call ef_potential( nfi, rhos, bec, deeq, betae, c0, cm, emadt2, emaver, verl1, verl2, c2, c3 )
+        !
+     else
+
+        IF (.NOT.(ALLOCATED(tg_c2))) ALLOCATE(tg_c2((NOGRP+1)*ngw))
+        IF (.NOT.(ALLOCATED(tg_c3))) ALLOCATE(tg_c3((NOGRP+1)*ngw))
+
+        !---------------------------------------------------------------
+        !This loop is parallelized accross the eigenstates as well as
+        !in the FFT, similar to rhoofr
+        !---------------------------------------------------------------
+
+        !------------------------------------
+        !The potential in rhos
+        !is distributed accros all processors
+        !We need to redistribute it so that
+        !it is completely contained in the
+        !processors of an orbital TASK-GROUP
+        !------------------------------------
+        !
+        recv_cnt(1)   = dffts%npp(NOLIST(1)+1)*nr1sx*nr2sx
+        recv_displ(1) = 0
+        DO i = 2, NOGRP
+           recv_cnt(i) = dffts%npp(NOLIST(i)+1)*nr1sx*nr2sx
+           recv_displ(i) = recv_displ(i-1) + recv_cnt(i)
+        ENDDO
+        IF (.NOT.ALLOCATED(tg_rhos)) ALLOCATE(tg_rhos( (NOGRP+1)*nr1sx*nr2sx*maxval(dffts%npp),nspin))
+
+        tg_c3(:) = 0D0
+        tg_c3(:) = 0D0
+        tg_rhos(:,:) = 0D0
+
+        DO i = 1, nspin
+           CALL MPI_Allgatherv(rhos(1,i), dffts%npp(me_image+1)*nr1sx*nr2sx, MPI_DOUBLE_PRECISION, &
+                tg_rhos(1,i), recv_cnt, recv_displ, MPI_DOUBLE_PRECISION, ME_OGRP, IERR)
+        ENDDO
+
+        do i = 1, n, 2*NOGRP ! 2*NOGRP eigenvalues are treated at each iteration
+
+           !----------------------------------------------------------------
+           !The input coefficients to dforce cover eigenstates i:i+2*NOGRP-1
+           !Thus, in dforce the dummy arguments for c0(1,i,1,1) and
+           !c0(1,i+1,1,1) hold coefficients for eigenstates i,i+2*NOGRP-2,2
+           !and i+1,i+2*NOGRP...for example if NOGRP is 4 then we would have
+           !1-3-5-7 and 2-4-6-8
+           !----------------------------------------------------------------
+
+           call dforce( bec, betae, i, c0(1,i), c0(1,i+1), tg_c2, tg_c3, tg_rhos)
+
+           !-------------------------------------------------------
+           !C. Bekas: This is not implemented yet! I need to see it
+           !-------------------------------------------------------
+
+           if( tefield ) then
+             CALL errore( ' runcp_uspp ', ' electric field on BGL not implemented yet ', 1 )
+           end if
+
+           IF( iflag == 2 ) THEN
+             DO index = 1, 2 * NOGRP, 2
+                cm(:,i+index-1) = c0(:,i+index-1)
+                cm(:,i+index) = c0(:,i+index)
+             ENDDO
+           END IF
+
+           index_in = 1
+           DO index = 1, 2*NOGRP, 2
+              IF (tsde) THEN
+                 CALL my_wave_steepest( cm(:, i+index-1 ), c0(:, i+index-1 ), emaver, tg_c2, ngw, index_in )
+                 CALL my_wave_steepest( cm(:, i+index), c0(:, i+index), emaver, tg_c3, ngw, index_in )
+              ELSE
+                 CALL my_wave_verlet( cm(:, i+index-1 ), c0(:, i+index-1 ), &
+                      verl1, verl2, emaver, tg_c2, ngw, index_in )
+                 CALL my_wave_verlet( cm(:, i+index), c0(:, i+index ), &
+                      verl1, verl2, emaver, tg_c3, ngw, index_in )
+
+              ENDIF
+              if ( gstart == 2 ) then
+                 cm(1,  i+index-1)=cmplx(real(cm(1,  i+index-1)),0.0)
+                 cm(1,i+index)=cmplx(real(cm(1,i+index)),0.0)
+              end if
+             index_in = index_in+1
+
+           ENDDO ! End loop accross 2*NOGRP current eigenstates
+
+        end do ! End loop accross eigenstates
+
+     end if
+
+     DEALLOCATE( emadt2 )
+     DEALLOCATE( emaver )
+     deallocate(c2)
+     deallocate(c3)
+
+
+   END SUBROUTINE runcp_uspp_bgl
+!
+!=----------------------------------------------------------------------------=!
+!=----------------------------------------------------------------------------=!
+
     SUBROUTINE runcp_uspp_force_pairing( nfi, fccc, ccc, ema0bg, dt2bye, rhos, bec, c0, cm, &
                                          intermed, fromscra, restart )
   !
       USE wave_base, ONLY: wave_steepest, wave_verlet
-      USE control_flags, ONLY: tbuff, lwf, tsde
+      USE control_flags, ONLY: lwf, tsde
   !   use uspp, only : nhsa=> nkb, betae => vkb, rhovan => becsum, deeq
       USE uspp, ONLY : deeq, betae => vkb
       USE reciprocal_vectors, ONLY : gstart
diff --git a/CPV/smcp.f90 b/CPV/smcp.f90
index 5e547ea67..e0e19ab6d 100644
--- a/CPV/smcp.f90
+++ b/CPV/smcp.f90
@@ -14,7 +14,7 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out )
   ! ... main subroutine for SMD by Yosuke Kanai
   !
   USE kinds,                    ONLY : DP
-  USE control_flags,            ONLY : iprint, isave, thdyn, tpre, tbuff, &
+  USE control_flags,            ONLY : iprint, isave, thdyn, tpre, &
                                        iprsta, tfor, tvlocw,      &
                                        taurdr, tprnfor, tsdc, ndr, ndw, nbeg,  &
                                        nomore, tsde, tortho, tnosee, tnosep,   &
@@ -228,12 +228,6 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out )
   smpm = smd_p -1
   !
   !
-  IF(tbuff) THEN
-     WRITE(stdout,*) "TBUFF set to .FALSE., Option not implemented with SMD"
-     tbuff = .FALSE.
-  ENDIF
-  !
-  !
   !     Opening files
   !
   !
@@ -600,10 +594,6 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out )
         CALL runcp_uspp( nfi, fccc(sm_k), ccc(sm_k), ema0bg, dt2bye, rhos, &
              rep_el(sm_k)%bec, rep_el(sm_k)%cm, rep_el(sm_k)%c0, fromscra = .TRUE. )
         !
-        !     buffer for wavefunctions is unit 21
-        !
-        IF(tbuff) REWIND 21
-        !
         !     nlfq needs deeq calculated in newd
         !
         IF ( tfor .OR. tprnfor ) CALL nlfq(rep_el(sm_k)%cm,eigr, &
@@ -924,10 +914,6 @@ SUBROUTINE smdmain( tau, fion_out, etot_out, nat_out )
         CALL runcp_uspp( nfi, fccc(sm_k), ccc(sm_k), ema0bg, dt2bye, rhos, &
              rep_el(sm_k)%bec, rep_el(sm_k)%c0, rep_el(sm_k)%cm )
         !
-        !     buffer for wavefunctions is unit 21
-        !
-        IF(tbuff) REWIND 21
-        !
         !----------------------------------------------------------------------
         !                 contribution to fion due to lambda
         !----------------------------------------------------------------------
diff --git a/CPV/stress.f90 b/CPV/stress.f90
index 53a60800f..7b31f9f0c 100644
--- a/CPV/stress.f90
+++ b/CPV/stress.f90
@@ -560,9 +560,7 @@
 !  ----------------------------------------------
 
       SUBROUTINE pseudo_stress( deps, epseu, gagb, sfac, dvps, rhoeg, omega )
-
-      !  
-
+      !
       USE ions_base,          ONLY: nsp
       USE reciprocal_vectors, ONLY: gstart
       USE gvecs,              ONLY: ngs
@@ -578,7 +576,7 @@
 
       INTEGER     :: ig,k,is, ispin
       COMPLEX(DP) :: rhets, depst(6)
-      !  
+      !
       depst = (0.d0,0.d0)
 
       DO is = 1, nsp
@@ -606,7 +604,6 @@
       RETURN
       END SUBROUTINE pseudo_stress
 
-
 !  ----------------------------------------------
 
 
@@ -752,23 +749,30 @@
 
       SUBROUTINE stress_debug(dekin, deht, dexc, desr, deps, denl, htm1)
         USE io_global, ONLY: stdout
+        USE mp_global, ONLY: intra_image_comm
+        USE mp,        ONLY: mp_sum
         REAL(DP) :: dekin(:), deht(:), dexc(:), desr(:), deps(:), denl(:)
         REAL(DP) :: detot( 6 ), htm1(3,3)
         REAL(DP) :: detmp(3,3)
+
         INTEGER :: k, i, j
+
         detot = dekin + deht + dexc + desr + deps + denl
-        WRITE( stdout,106) detot
-        WRITE( stdout,100) dekin
-        WRITE( stdout,101) deht
-        WRITE( stdout,102) dexc
-        WRITE( stdout,103) desr
-        WRITE( stdout,104) deps
-        WRITE( stdout,105) denl
+
+        DO k=1,6
+          detmp(alpha(k),beta(k)) = detot(k)
+          detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
+        END DO
+        CALL mp_sum( detmp, intra_image_comm )
+        detmp = MATMUL( detmp(:,:), htm1(:,:) )
+        WRITE( stdout,*) "derivative of e(tot)"
+        WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
 
         DO k=1,6
           detmp(alpha(k),beta(k)) = dekin(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(kin)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
@@ -777,6 +781,7 @@
           detmp(alpha(k),beta(k)) = deht(k) + desr(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(electrostatic)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
@@ -785,6 +790,7 @@
           detmp(alpha(k),beta(k)) = deht(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(h)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
@@ -793,6 +799,7 @@
           detmp(alpha(k),beta(k)) = desr(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(sr)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
@@ -801,6 +808,7 @@
           detmp(alpha(k),beta(k)) = deps(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(ps)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
@@ -809,6 +817,7 @@
           detmp(alpha(k),beta(k)) = denl(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(nl)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
@@ -817,20 +826,15 @@
           detmp(alpha(k),beta(k)) = dexc(k)
           detmp(beta(k),alpha(k)) = detmp(alpha(k),beta(k))
         END DO
+        CALL mp_sum( detmp, intra_image_comm )
         detmp = MATMUL( detmp(:,:), htm1(:,:) )
         WRITE( stdout,*) "derivative of e(xc)"
         WRITE( stdout,5555) ((detmp(i,j),j=1,3),i=1,3)
+
 5555  format(1x,f12.5,1x,f12.5,1x,f12.5/                                &
      &       1x,f12.5,1x,f12.5,1x,f12.5/                                &
      &       1x,f12.5,1x,f12.5,1x,f12.5//)
 
-  100   FORMAT(' dekin :',6F12.4)
-  101   FORMAT(' deht  :',6F12.4)
-  102   FORMAT(' dexc  :',6F12.4)
-  103   FORMAT(' desr  :',6F12.4)
-  104   FORMAT(' deps  :',6F12.4)
-  105   FORMAT(' denl  :',6F12.4)
-  106   FORMAT(' detot :',6F12.4)
       RETURN
       END SUBROUTINE stress_debug
 
diff --git a/CPV/wf.f90 b/CPV/wf.f90
index e83b44c34..b7f39499a 100644
--- a/CPV/wf.f90
+++ b/CPV/wf.f90
@@ -3572,7 +3572,7 @@ SUBROUTINE dforce_field( bec, deeq, betae, i, c, ca, df, da, v, v1 )
   ! ...                                e^-ig.r_i < beta_i,j | c_n > }
   !
   USE kinds,                  ONLY : DP
-  USE control_flags,          ONLY : iprint, tbuff
+  USE control_flags,          ONLY : iprint
   USE gvecs,                  ONLY : nms, nps
   USE gvecw,                  ONLY : ngw
   USE cvan,                   ONLY : ish
@@ -3612,25 +3612,15 @@ SUBROUTINE dforce_field( bec, deeq, betae, i, c, ca, df, da, v, v1 )
      END DO
   ENDIF
   !
-  IF (.NOT.tbuff) THEN
-     !
-     psi( 1:nnrsx ) = 0.D0
-     DO ig=1,ngw
-        psi(nms(ig))=CONJG(c(ig)-CI*ca(ig))
-        psi(nps(ig))=c(ig)+CI*ca(ig)
-     END DO
-     !
-     CALL invfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
-     !     
-  ELSE
-     !
-     !     read psi from buffer 21
-     !
-     READ(21,iostat=ios) psi
-     IF(ios.NE.0) CALL errore( 'dforce', 'error in reading unit 21', ios )
-     !
-  ENDIF
-  ! 
+  !
+  psi( 1:nnrsx ) = 0.D0
+  DO ig=1,ngw
+     psi(nms(ig))=CONJG(c(ig)-CI*ca(ig))
+     psi(nps(ig))=c(ig)+CI*ca(ig)
+  END DO
+  !
+  CALL invfft('Wave',psi,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
+  !     
   iss1=ispin(i)
   !
   ! the following avoids a potential out-of-bounds error
@@ -3866,7 +3856,7 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, &
   !
   USE constants,              ONLY : bohr_radius_angs
   USE kinds,                  ONLY : DP
-  USE control_flags,          ONLY : iprint, tbuff, iprsta, thdyn, tpre, trhor
+  USE control_flags,          ONLY : iprint, iprsta, thdyn, tpre, trhor
   USE ions_base,              ONLY : nax, nat, nsp, na
   USE cell_base,              ONLY : a1, a2, a3
   USE recvecs_indexes,        ONLY : np, nM
@@ -3889,6 +3879,7 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, &
   USE uspp_param,             ONLY : nh, nhm
   USE uspp,                   ONLY : nkb
   USE fft_module,             ONLY : fwfft, invfft
+  USE charge_density,         ONLY : checkrho
   USE mp,                     ONLY : mp_sum
   !
   IMPLICIT NONE
@@ -4015,9 +4006,6 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, &
      !
      CALL invfft('Wave',psis,nr1s,nr2s,nr3s,nr1sx,nr2sx,nr3sx)
      !
-     !     wavefunctions in unit 21
-     !
-     IF(tbuff) WRITE(21,iostat=ios) psis
      !            iss1=ispin(i)
      iss1=1
      sa1=f(i)/omega
@@ -4033,17 +4021,8 @@ SUBROUTINE rhoiofr( nfi, c, irb, eigrb, bec, &
         rhos(ir,iss2)=rhos(ir,iss2) + sa2*(AIMAG(psis(ir)))**2
      END DO
      !
-     !       buffer 21
-     !     
-     IF(tbuff) THEN
-        IF(ios.NE.0) CALL errore                                  &
-             &              (' rhoofr',' error in writing unit 21',ios)
-     ENDIF
-     !
      !         end do
      !
-     IF(tbuff) REWIND 21
-     !
      !     smooth charge in g-space is put into rhog(ig)
      !
      IF(nspin.EQ.1)THEN
diff --git a/Modules/control_flags.f90 b/Modules/control_flags.f90
index 14a39ab8e..cca68177c 100644
--- a/Modules/control_flags.f90
+++ b/Modules/control_flags.f90
@@ -53,7 +53,7 @@ MODULE control_flags
             tscreen, gamma_only, force_pairing, tchi2
   !
   PUBLIC :: fix_dependencies, check_flags
-  PUBLIC :: tbuff, tvlocw, trhor, thdyn, iprsta
+  PUBLIC :: tvlocw, trhor, thdyn, iprsta
   PUBLIC :: twfcollect, printwfc
   PUBLIC :: tuspp
   PUBLIC :: program_name
@@ -66,7 +66,6 @@ MODULE control_flags
                                           !                 'CPVC'  cp
                                           !                 'SMCP'  smcp
   !
-  LOGICAL :: tbuff     = .FALSE. ! save wfc on        unit 21 (only cp, never used)
   LOGICAL :: tvlocw    = .FALSE. ! write potential to unit 46 (only cp, seldom used)
   LOGICAL :: trhor     = .FALSE. ! read rho from      unit 47 (only cp, seldom used)
   !
diff --git a/Modules/read_upf.f90 b/Modules/read_upf.f90
index 2d15ffed2..dc9875803 100644
--- a/Modules/read_upf.f90
+++ b/Modules/read_upf.f90
@@ -418,6 +418,11 @@ subroutine read_pseudo_nl (upf, iunps)
               call scan_begin (iunps, "QFCOEF", .false.)  
               read (iunps,*,err=106,end=106) &
                         ( ( upf%qfcoef(i,lp,nb,mb), i=1,upf%nqf ), lp=1,upf%nqlc )
+              do i = 1, upf%nqf
+                 do lp = 1, upf%nqlc
+                    upf%qfcoef(i,lp,mb,nb) = upf%qfcoef(i,lp,nb,mb)
+                 end do
+              end do
               call scan_end (iunps, "QFCOEF")  
            end if
 
diff --git a/PW/stres_loc.f90 b/PW/stres_loc.f90
index fe85cb05d..a60590c1e 100644
--- a/PW/stres_loc.f90
+++ b/PW/stres_loc.f90
@@ -61,7 +61,8 @@ subroutine stres_loc (sigmaloc)
              * vloc (igtongl (ng), nt) * fact
      enddo
   enddo
-  !      WRITE( stdout,*) ' evloc ', evloc, evloc*omega
+  !
+  !      WRITE( 6,*) ' evloc ', evloc, evloc*omega   ! DEBUG
   !
   do nt = 1, ntyp
      ! dvloc contains dV_loc(G)/dG