diff --git a/HP/src/hp_allocate_q.f90 b/HP/src/hp_allocate_q.f90 index 8eafbc13c..2614e34bf 100644 --- a/HP/src/hp_allocate_q.f90 +++ b/HP/src/hp_allocate_q.f90 @@ -16,7 +16,7 @@ subroutine hp_allocate_q USE ions_base, ONLY : nat USE wvfct, ONLY : nbnd, npwx USE lsda_mod, ONLY : nspin - USE noncollin_module, ONLY : npol, nspin_mag + USE noncollin_module, ONLY : npol, nspin_mag, domag USE fft_base, ONLY : dfftp USE wavefunctions, ONLY : evc USE becmod, ONLY : allocate_bec_type @@ -27,6 +27,9 @@ subroutine hp_allocate_q USE control_lr, ONLY : lgamma USE ldaU, ONLY : Hubbard_lmax, nwfcU USE ldaU_lr, ONLY : swfcatomk, swfcatomkpq + USE qpoint_aux, ONLY : becpt + USE hp_nc_mag_aux, ONLY : deeq_nc_save + USE uspp_param, ONLY : nhm ! IMPLICIT NONE INTEGER :: ik @@ -42,6 +45,19 @@ subroutine hp_allocate_q ALLOCATE (dvpsi(npwx*npol,nbnd)) ALLOCATE (dpsi(npwx*npol,nbnd)) ALLOCATE (dmuxc(dfftp%nnr,nspin_mag,nspin_mag)) + ! -------------- LUCA --------------------------- + ! NB: based on subroutine allocate_phq + IF (noncolin.AND.domag) THEN + ALLOCATE (becpt(nksq)) + DO ik=1,nksq + CALL allocate_bec_type ( nkb, nbnd, becpt(ik) ) + ENDDO + IF (okvan) THEN + ALLOCATE (deeq_nc_save( nhm, nhm, nat, nspin, 2)) + ENDIF + ENDIF + ! ----------------------------------------------- + ! ! IF (okvan) THEN ALLOCATE (eigqts(nat)) @@ -51,11 +67,12 @@ subroutine hp_allocate_q ENDDO ENDIF ! - ALLOCATE (swfcatomk(npwx,nwfcU)) + ! ---------- LUCA (added npol) ------------------- + ALLOCATE (swfcatomk(npwx*npol,nwfcU)) IF (lgamma) THEN swfcatomkpq => swfcatomk ELSE - ALLOCATE (swfcatomkpq(npwx,nwfcU)) + ALLOCATE (swfcatomkpq(npwx*npol,nwfcU)) ENDIF ! RETURN diff --git a/HP/src/hp_calc_chi.f90 b/HP/src/hp_calc_chi.f90 index 03760f88c..2f3b11fb5 100644 --- a/HP/src/hp_calc_chi.f90 +++ b/HP/src/hp_calc_chi.f90 @@ -19,6 +19,7 @@ SUBROUTINE hp_calc_chi USE io_global, ONLY : stdout USE ldaU_hp, ONLY : nqsh, nath_sc, nah_pert, chi0, chi, & dnsscf_tot, dns0_tot + USE noncollin_module, ONLY : npol ! IMPLICIT NONE ! @@ -75,9 +76,10 @@ SUBROUTINE calcchi (dns_, chi_, name_) ! ! Divide by rytoev -> conversion of units from Ry to eV ! - DO is = 1, nspin + ! ---------- LUCA (added npol) -------------------- + DO is = 1, nspin/npol DO m = 1, 2 * Hubbard_l(nt) + 1 - trace_dns(is) = trace_dns(is) + dns_(m,m,is,na,icell)/rytoev + trace_dns(is) = trace_dns(is) + dns_(m,m,is**npol,na,icell)/rytoev ENDDO trace_dns_tot = trace_dns_tot + trace_dns(is) ENDDO diff --git a/HP/src/hp_dealloc_q.f90 b/HP/src/hp_dealloc_q.f90 index 74b4c3074..0993d9fce 100644 --- a/HP/src/hp_dealloc_q.f90 +++ b/HP/src/hp_dealloc_q.f90 @@ -23,6 +23,8 @@ SUBROUTINE hp_dealloc_q() USE eqv, ONLY : dmuxc, dpsi, dvpsi, evq USE control_lr, ONLY : lgamma, nbnd_occ USE ldaU_lr, ONLY : swfcatomk, swfcatomkpq + USE qpoint_aux, ONLY : ikmks, ikmkmqs, becpt + USE hp_nc_mag_aux, ONLY : deeq_nc_save ! IMPLICIT NONE INTEGER :: ik @@ -39,16 +41,33 @@ SUBROUTINE hp_dealloc_q() if (allocated(nbnd_occ)) deallocate (nbnd_occ) if (allocated(ikks)) deallocate (ikks) if (allocated(ikqs)) deallocate (ikqs) + ! ----------- LUCA ---------------- + if (allocated(ikmks)) deallocate (ikmks) + if (allocated(ikmkmqs)) deallocate (ikmkmqs) + ! -------------------------------- if (allocated(m_loc)) deallocate (m_loc) ! + if (allocated(this_pert_is_on_file)) & + & deallocate (this_pert_is_on_file) + ! IF (okvan) THEN if (allocated(eigqts)) deallocate (eigqts) + ! if (allocated(becp1)) then do ik=1,size(becp1) call deallocate_bec_type ( becp1(ik) ) enddo deallocate(becp1) endif + ! -------------- LUCA ----------------- + if (allocated(becpt)) then + do ik=1,size(becpt) + call deallocate_bec_type ( becpt(ik) ) + enddo + deallocate(becpt) + endif + if (allocated(deeq_nc_save)) deallocate(deeq_nc_save) + ! ------------------------------------- ENDIF ! ! GGA-specific arrays diff --git a/HP/src/hp_dnsq.f90 b/HP/src/hp_dnsq.f90 index bc520e10d..f0e43a8ab 100644 --- a/HP/src/hp_dnsq.f90 +++ b/HP/src/hp_dnsq.f90 @@ -45,6 +45,8 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) conv_thr_chi_best, iter_best USE ldaU_lr, ONLY : swfcatomk, swfcatomkpq USE efermi_shift, ONLY : def + USE noncollin_module, ONLY : noncolin, npol, nspin_mag, domag + USE qpoint_aux, ONLY : ikmks ! IMPLICIT NONE ! @@ -60,7 +62,8 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) ! INTEGER :: ik, ikk, ikq, i, j, k, ios, icar, & counter, nt, na, l, ih, jkb2, n, & - ihubst, ihubst1, ihubst2, m, m1, m2, ibnd, is, ldim + ihubst, ihubst1, ihubst2, m, m1, m2, ibnd, is, ldim, & + ikmk, isolv, nsolv, nrec, is1, is2, isp1, isp2 INTEGER :: npw, npwq ! number of plane waves at k and k+q COMPLEX(DP), ALLOCATABLE :: dpsi(:, :), proj1(:,:), proj2(:,:) @@ -76,7 +79,8 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) ! ios = 0 ! - ALLOCATE (dpsi(npwx,nbnd)) + ! ---------- LUCA (added npol) ---------------- + ALLOCATE (dpsi(npwx*npol,nbnd)) ALLOCATE (proj1(nbnd,nwfcU)) ALLOCATE (proj2(nbnd,nwfcU)) ALLOCATE (trace_dns_tot(nat)) @@ -100,102 +104,149 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) ENDIF ENDDO ! + ! ----------------------- LUCA ----------------- + nsolv = 1 + IF (noncolin.AND.domag) nsolv = 2 + ! ---------------------------------------------- DO ik = 1, nksq ! ikk = ikks(ik) ikq = ikqs(ik) npw = ngk(ikk) npwq = ngk(ikq) - ! - IF (lsda) current_spin = isk (ikk) - ! - ! Read unperturbed KS wfc's psi(k) - ! - IF (nksq.GT.1) CALL get_buffer (evc, lrwfc, iuwfc, ikk) - ! - ! Read atomic wfc's : S(k)*phi(k) and S(k+q)*phi(k+q) - ! - CALL get_buffer (swfcatomk, nwordwfcU, iuatswfc, ikk) - IF (.NOT.lgamma) CALL get_buffer (swfcatomkpq, nwordwfcU, iuatswfc, ikq) - ! - ! At each SCF iteration for each ik read dpsi from file - ! - CALL get_buffer (dpsi, lrdwf, iudwf, ik) - ! - ! Loop on Hubbard atoms - ! - DO na = 1, nat + ! ------------ LUCA -------------------- + DO isolv = 1, nsolv + IF (isolv == 2) THEN + ikmk = ikmks(ik) + ELSE + ikmk = ikk + ENDIF ! + IF (lsda) current_spin = isk (ikk) + ! + ! Read unperturbed KS wfc's psi(k) + ! ---------- LUCA (ikmk instead of ikk) ----------------- + IF (nksq.GT.1) CALL get_buffer (evc, lrwfc, iuwfc, ikmk) + ! + ! Read atomic wfc's : S(k)*phi(k) and S(k+q)*phi(k+q) + CALL get_buffer (swfcatomk, nwordwfcU, iuatswfc, ikk) + IF (.NOT.lgamma) CALL get_buffer (swfcatomkpq, nwordwfcU, iuatswfc, ikq) + ! + ! At each SCF iteration for each ik read dpsi from file + nrec = ik + (isolv - 1) * nksq + CALL get_buffer (dpsi, lrdwf, iudwf, nrec) + ! + ! Loop on Hubbard atoms + proj1 = (0.d0, 0.d0) + proj2 = (0.d0, 0.d0) + DO na = 1, nat nt = ityp(na) - ! IF (is_hubbard(nt)) THEN - ! - DO m = 1, 2 * Hubbard_l(nt) + 1 - ! + ldim = (2*Hubbard_l(nt) + 1) * npol + DO m = 1, ldim ihubst = offsetU(na) + m ! I m index ! ! proj1(ibnd, ihubst) = < S(k)*phi(k) | psi(k) > ! proj2(ibnd, ihubst) = < S(k+q)*phi(k+q) | dpsi(k+q) > ! FIXME: use ZGEMM instead of dot_product - ! DO ibnd = 1, nbnd_occ(ikk) - proj1(ibnd, ihubst) = DOT_PRODUCT( swfcatomk(1:npw,ihubst), evc(1:npw,ibnd) ) - proj2(ibnd, ihubst) = DOT_PRODUCT( swfcatomkpq(1:npwq,ihubst), dpsi(1:npwq,ibnd) ) + ! --------------------- LUCA ---------------------- +! IF (noncolin) then + proj1(ibnd, ihubst) = DOT_PRODUCT( swfcatomk(1:npwx*npol,ihubst), & + evc(1:npwx*npol,ibnd) ) + proj2(ibnd, ihubst) = DOT_PRODUCT( swfcatomkpq(1:npwx*npol,ihubst), & + dpsi(1:npwx*npol,ibnd) ) +! ELSE +! proj1(ibnd, ihubst) = DOT_PRODUCT( swfcatomk(1:npw,ihubst), & +! evc(1:npw,ibnd) ) +! proj2(ibnd, ihubst) = DOT_PRODUCT( swfcatomkpq(1:npwq,ihubst), & +! dpsi(1:npwq,ibnd) ) + ENDIF + !------------------------------------------- ENDDO - ! ENDDO - ! ENDIF ! ENDDO ! + CALL mp_sum(proj1, intra_pool_comm) CALL mp_sum(proj2, intra_pool_comm) + ! - DO na = 1, nat - ! - nt = ityp(na) - ! - IF (is_hubbard(nt)) THEN - ! - DO m1 = 1, 2 * Hubbard_l(nt) + 1 - ! - ihubst1 = offsetU(na) + m1 - ! - DO m2 = m1, 2 * Hubbard_l(nt) + 1 - ! - ihubst2 = offsetU(na) + m2 - ! - DO ibnd = 1, nbnd_occ(ikk) - ! - dnsq(m1, m2, current_spin, na) = dnsq(m1, m2, current_spin, na) + & - wk(ikk) * ( CONJG(proj1(ibnd,ihubst1)) * proj2(ibnd,ihubst2) + & - CONJG(proj1(ibnd,ihubst2)) * proj2(ibnd,ihubst1) ) - ! - ! Correction for metals at q=0 - ! - IF (lmetq0) THEN + IF (noncolin) then + ! --------------------- LUCA ---------------------------- + ! NB: put the following in a subroutine + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + ldim = 2*Hubbard_l(nt)+1 + DO is1 = 1, npol + DO is2 = 1, npol + is = npol*(is1-1) + is2 + isp1 = is1 + (is2 - is1)*(isolv - 1) + isp2 = is2 + (is1 - is2)*(isolv - 1) + DO m1 = 1, ldim + DO m2 = 1, ldim + ihubst2 = offsetU(na) + m2 + (m1 - m2)*(isolv - 1) + ihubst1 = offsetU(na) + m1 + (m2 - m1)*(isolv - 1) + DO ibnd = 1, nbnd_occ(ikk) + dnsq(m1,m2,is,na) = dnsq(m1,m2,is,na) + & + wk(ikk) * CONJG(proj1(ibnd,ihubst1+ldim*(isp1-1)) ) * & + proj2(ibnd,ihubst2+ldim*(isp2-1)) + ! + ! to be added the correction for metals at q=0 + IF (lmetq0.and.isolv==1) then + ! + ! wdelta: smeared delta function at the Fermi level + wdelta = w0gauss ( (ef-et(ibnd,ikk)) / degauss, ngauss) / degauss + weight = wk(ikk) + w1 = weight * wdelta + ! + dnsq(m1, m2, is, na) = dnsq(m1, m2, is, na) + & + w1 * def(1) * CONJG(proj1(ibnd,ihubst1+ldim*(is1-1))) & + * proj1(ibnd,ihubst2+ldim*(is2-1)) + ENDIF + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO + ! -------------------------------------------------------- + ELSE + DO na = 1, nat + nt = ityp(na) + IF (is_hubbard(nt)) THEN + DO m1 = 1, 2 * Hubbard_l(nt) + 1 + ihubst1 = offsetU(na) + m1 + DO m2 = m1, 2 * Hubbard_l(nt) + 1 + ihubst2 = offsetU(na) + m2 + DO ibnd = 1, nbnd_occ(ikk) + dnsq(m1, m2, current_spin, na) = dnsq(m1, m2, current_spin, na) + & + wk(ikk) * ( CONJG(proj1(ibnd,ihubst1)) * proj2(ibnd,ihubst2) + & + CONJG(proj1(ibnd,ihubst2)) * proj2(ibnd,ihubst1) ) ! - ! wdelta: smeared delta function at the Fermi level - ! - wdelta = w0gauss ( (ef-et(ibnd,ikk)) / degauss, ngauss) / degauss - weight = wk(ikk) - w1 = weight * wdelta - ! - dnsq(m1, m2, current_spin, na) = dnsq(m1, m2, current_spin, na) + & - w1 * def(1) * CONJG(proj1(ibnd,ihubst1)) * proj1(ibnd,ihubst2) - ! - ENDIF - ! - ENDDO - ! - ENDDO - ! + ! Correction for metals at q=0 + IF (lmetq0) THEN + ! + ! wdelta: smeared delta function at the Fermi level + wdelta = w0gauss ( (ef-et(ibnd,ikk)) / degauss, ngauss) / degauss + weight = wk(ikk) + w1 = weight * wdelta + ! + dnsq(m1, m2, current_spin, na) = dnsq(m1, m2, current_spin, na) + & + w1 * def(1) * CONJG(proj1(ibnd,ihubst1)) * proj1(ibnd,ihubst2) + ! + ENDIF + ENDDO + ENDDO + ENDDO + ENDIF ENDDO - ! - ENDIF - ! - ENDDO + ENDIF + ENDDO ! isolv ! ENDDO ! ik ! @@ -205,11 +256,15 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) ! ! dnsq is symmetric: filling the m1 m2 lower triangular part ! - DO m1 = 2, ldim - DO m2 = 1, m1-1 - dnsq(m1,m2,:,:) = dnsq(m2,m1,:,:) + ! --------------- LUCA ------------- + IF (.not.noncolin) then + ldim = 2 * Hubbard_lmax + 1 + DO m1 = 2, ldim + DO m2 = 1, m1-1 + dnsq(m1,m2,:,:) = dnsq(m2,m1,:,:) + ENDDO ENDDO - ENDDO + ENDIF ! ! If nspin=1, k-point's weight wk is normalized to 2 el/band in the whole BZ, ! but we are interested in dnsq of one spin component @@ -221,11 +276,12 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) DO na = 1, nat nt = ityp(na) IF (is_hubbard(nt)) THEN + ldim = 2*Hubbard_l(nt)+1 DO is = 1, nspin WRITE(stdout,'(5x,a,1x,i2,2x,a,1x,i2)') ' Hubbard atom', na, 'spin', is - DO m1 = 1, 2*Hubbard_l(nt)+1 + DO m1 = 1, ldim WRITE(stdout,'(3x,10(f15.10,1x))') (DBLE(dnsq(m1,m2,is,na)), & - m2=1,2*Hubbard_l(nt)+1) + m2=1,ldim) ENDDO ENDDO ENDIF @@ -241,11 +297,12 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) DO na = 1, nat nt = ityp(na) IF (is_hubbard(nt)) THEN + ldim = 2*Hubbard_l(nt)+1 DO is = 1, nspin WRITE(stdout,'(5x,a,1x,i2,2x,a,1x,i2)') ' Hubbard atom', na, 'spin', is - DO m1 = 1, 2*Hubbard_l(nt)+1 + DO m1 = 1, ldim WRITE(stdout,'(3x,10(f15.10,1x))') (DBLE(dnsq(m1,m2,is,na)), & - m2=1,2*Hubbard_l(nt)+1) + m2=1,ldim) ENDDO ENDDO ENDIF @@ -266,13 +323,15 @@ SUBROUTINE hp_dnsq (lmetq0, iter, conv_root, dnsq) nt = ityp(na) ! IF (is_hubbard(nt)) THEN + ldim = 2*Hubbard_l(nt)+1 ! ! Divide by rytoev -> conversion of units from Ry to eV ! trace_dns(:) = 0.d0 - DO is = 1, nspin - DO m = 1, 2 * Hubbard_l(nt) + 1 - trace_dns(is) = trace_dns(is) + dnsq(m,m,is,na)/rytoev + DO is = 1, nspin/npol + DO m = 1, ldim + ! ------------ LUCA (added npol) ------------------- + trace_dns(is) = trace_dns(is) + dnsq(m,m,is**npol,na)/rytoev ENDDO trace_dns_tot(na) = trace_dns_tot(na) + trace_dns(is) ENDDO diff --git a/PW/src/electrons.f90 b/PW/src/electrons.f90 index 88b13d70c..b585dc8a0 100644 --- a/PW/src/electrons.f90 +++ b/PW/src/electrons.f90 @@ -691,7 +691,15 @@ SUBROUTINE electrons_scf ( printout, exxen ) ! IF ( iverbosity > 0 .OR. first ) THEN IF (lda_plus_u_kind.EQ.0) THEN - CALL write_ns() + ! --- LUCA -------- + IF (noncolin) THEN + CALL write_ns_nc() + ELSE + ! ----------------- + CALL write_ns() + ! --- LUCA -------- + ENDIF + ! ----------------- ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN CALL write_ns_nc() @@ -710,6 +718,10 @@ SUBROUTINE electrons_scf ( printout, exxen ) ! IF (hub_pot_fix) THEN IF (lda_plus_u_kind.EQ.0) THEN + ! ---- LUCA ------------ + IF (noncolin) CALL errore('electrons_scf', & + & 'hub_pot_fix is not implemented for (lda_plus_u_kind=0 .AND. noncolin)',1) + ! --------------------- rho%ns = rhoin%ns ! back to input values IF (lhb) rho%nsb = rhoin%nsb ELSEIF (lda_plus_u_kind.EQ.1) THEN @@ -722,9 +734,17 @@ SUBROUTINE electrons_scf ( printout, exxen ) ! IF ( first .AND. starting_pot == 'atomic' ) THEN IF (lda_plus_u_kind.EQ.0) THEN - CALL ns_adj() - rhoin%ns = rho%ns - IF (lhb) rhoin%nsb = rho%nsb + CALL ns_adj() + ! --- LUCA -------------- + IF (noncolin) THEN + rhoin%ns_nc = rho%ns_nc + ELSE + ! ----------------- + rhoin%ns = rho%ns + IF (lhb) rhoin%nsb = rho%nsb + ! ------LUCA ----- + ENDIF + ! --------------- ELSEIF (lda_plus_u_kind.EQ.1) THEN CALL ns_adj() IF (noncolin) THEN @@ -739,8 +759,16 @@ SUBROUTINE electrons_scf ( printout, exxen ) IF ( iter <= niter_with_fixed_ns ) THEN WRITE( stdout, '(/,5X,"RESET ns to initial values (iter <= mixing_fixed_ns)",/)') IF (lda_plus_u_kind.EQ.0) THEN - rho%ns = rhoin%ns - IF (lhb) rhoin%nsb = rho%nsb + ! --- LUCA -------------- + IF (noncolin) THEN + rho%ns_nc = rhoin%ns_nc + ELSE + ! ----------------- + rho%ns = rhoin%ns + IF (lhb) rhoin%nsb = rho%nsb + ! ------LUCA ----- + ENDIF + ! --------------- ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN rho%ns_nc = rhoin%ns_nc @@ -962,7 +990,15 @@ SUBROUTINE electrons_scf ( printout, exxen ) ! ! Write the occupation matrices IF (lda_plus_u_kind == 0) THEN - CALL write_ns() + ! --- LUCA -------- + IF (noncolin) THEN + CALL write_ns_nc() + ELSE + ! ----------------- + CALL write_ns() + ! --- LUCA -------- + ENDIF + ! ----------------- ELSEIF (lda_plus_u_kind == 1) THEN IF (noncolin) THEN CALL write_ns_nc() @@ -1247,10 +1283,19 @@ SUBROUTINE electrons_scf ( printout, exxen ) ! IF (lda_plus_u .AND. (.NOT.hub_pot_fix)) THEN IF (lda_plus_u_kind.EQ.0) THEN - delta_e_hub = - SUM( rho%ns(:,:,:,:)*v%ns(:,:,:,:) ) - IF (lhb) delta_e_hub = delta_e_hub - SUM(rho%nsb(:,:,:,:)*v%nsb(:,:,:,:)) - IF (nspin==1) delta_e_hub = 2.d0 * delta_e_hub - delta_e = delta_e + delta_e_hub + ! --- LUCA ----------- + IF (noncolin) THEN + delta_e_hub = - SUM( rho%ns_nc(:,:,:,:)*v%ns_nc(:,:,:,:) ) + delta_e = delta_e + delta_e_hub + ELSE + ! -------------------- + delta_e_hub = - SUM( rho%ns(:,:,:,:)*v%ns(:,:,:,:) ) + IF (lhb) delta_e_hub = delta_e_hub - SUM(rho%nsb(:,:,:,:)*v%nsb(:,:,:,:)) + IF (nspin==1) delta_e_hub = 2.d0 * delta_e_hub + delta_e = delta_e + delta_e_hub + ! --- LUCA ---------- + ENDIF + ! ------------------- ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN delta_e_hub = - SUM( rho%ns_nc(:,:,:,:)*v%ns_nc(:,:,:,:) ) @@ -1334,11 +1379,20 @@ SUBROUTINE electrons_scf ( printout, exxen ) ! IF (lda_plus_u .AND. (.NOT.hub_pot_fix)) THEN IF (lda_plus_u_kind.EQ.0) THEN - delta_escf_hub = -SUM((rhoin%ns(:,:,:,:)-rho%ns(:,:,:,:))*v%ns(:,:,:,:)) - IF (lhb) delta_escf_hub = delta_escf_hub - & - SUM((rhoin%nsb(:,:,:,:)-rho%nsb(:,:,:,:))*v%nsb(:,:,:,:)) - IF ( nspin==1 ) delta_escf_hub = 2.d0 * delta_escf_hub - delta_escf = delta_escf + delta_escf_hub + ! --- LUCA -------------------- + IF (noncolin) THEN + delta_escf_hub = -SUM((rhoin%ns_nc(:,:,:,:)-rho%ns_nc(:,:,:,:))*v%ns_nc(:,:,:,:)) + delta_escf = delta_escf + delta_escf_hub + ELSE + ! ----------------------------- + delta_escf_hub = -SUM((rhoin%ns(:,:,:,:)-rho%ns(:,:,:,:))*v%ns(:,:,:,:)) + IF (lhb) delta_escf_hub = delta_escf_hub - & + SUM((rhoin%nsb(:,:,:,:)-rho%nsb(:,:,:,:))*v%nsb(:,:,:,:)) + IF ( nspin==1 ) delta_escf_hub = 2.d0 * delta_escf_hub + delta_escf = delta_escf + delta_escf_hub + ! --- LUCA -------------------- + ENDIF + ! ----------------------------- ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN delta_escf_hub = -SUM((rhoin%ns_nc(:,:,:,:)-rho%ns_nc(:,:,:,:))*v%ns_nc(:,:,:,:)) diff --git a/PW/src/force_hub.f90 b/PW/src/force_hub.f90 index 592c4fe0e..12d1ae9c4 100644 --- a/PW/src/force_hub.f90 +++ b/PW/src/force_hub.f90 @@ -45,12 +45,13 @@ SUBROUTINE force_hub( forceh ) USE io_files, ONLY : nwordwfc, iunwfc, nwordwfcU USE buffers, ONLY : get_buffer USE mp_bands, ONLY : use_bgrp_in_hpsi - USE noncollin_module, ONLY : noncolin + USE noncollin_module, ONLY : noncolin, npol USE force_mod, ONLY : eigenval, eigenvect, overlap_inv USE wavefunctions_gpum, ONLY : using_evc USE becmod_subs_gpum, ONLY : using_becp_auto USE uspp_init, ONLY : init_us_2 USE constants, ONLY : eps16 + USE mp_bands, ONLY : intra_bgrp_comm ! IMPLICIT NONE ! @@ -62,9 +63,12 @@ SUBROUTINE force_hub( forceh ) TYPE(bec_type) :: proj ! proj(nwfcU,nbnd) COMPLEX(DP), ALLOCATABLE :: spsi(:,:) REAL(DP), ALLOCATABLE :: dns(:,:,:,:), dnsb(:,:,:,:) + ! ---------- LUCA ------------------------------------- + COMPLEX (DP), ALLOCATABLE :: dns_nc(:,:,:,:) + ! ----------------------------------------------------- COMPLEX (DP), ALLOCATABLE :: dnsg(:,:,:,:,:) ! dns(ldim,ldim,nspin,nat) ! the derivative of the atomic occupations - INTEGER :: npw, alpha, na, nt, is, m1, m2, ipol, ldim, ik, ijkb0 + INTEGER :: npw, alpha, na, nt, is, is2, m1, m2, ipol, ldim, ik, ijkb0 INTEGER :: na1, na2, equiv_na2, nt1, nt2, ldim1, ldim2, viz INTEGER :: nb_s, nb_e, mykey, ldimb LOGICAL :: lhubb @@ -78,7 +82,7 @@ SUBROUTINE force_hub( forceh ) CALL errore("force_hub", & " forces for this Hubbard_projectors type not implemented",1) ! - IF (noncolin) CALL errore ("forceh","Noncollinear case is not supported",1) + ! IF (noncolin) CALL errore ("forceh","Noncollinear case is not supported",1) ! IF (ANY(Hubbard_J(:,:)>eps16)) CALL errore("force_hub", & " forces in the DFT+U+J scheme are not implemented", 1 ) @@ -87,11 +91,21 @@ SUBROUTINE force_hub( forceh ) ! DFT+U lhubb = .FALSE. ldim = 2*Hubbard_lmax + 1 - ALLOCATE ( dns(ldim, ldim, nspin, nat) ) + ! ---------------------- LUCA ----------------------- + IF (noncolin) then + ALLOCATE ( dns_nc(ldim, ldim, nspin, nat) ) + ELSE + ALLOCATE ( dns(ldim, ldim, nspin, nat) ) + ENDIF + ! -------------------------------------------------- DO nt = 1, ntyp IF (is_hubbard_back(nt)) lhubb = .TRUE. ENDDO + IF (lhubb) THEN + ! ------------ LUCA ------------------ + IF (noncolin) CALL errore ("force_hub","Noncollinear and background are not supported",1) + ! ------------------------------ ldimb = ldmx_b ALLOCATE ( dnsb(ldimb, ldimb, nspin, nat) ) ENDIF @@ -101,16 +115,23 @@ SUBROUTINE force_hub( forceh ) ALLOCATE( dnsg(ldim, ldim, max_num_neighbors, nat, nspin) ) ENDIF ! - ALLOCATE (spsi(npwx,nbnd)) - ALLOCATE (wfcatom(npwx,natomwfc)) + ! -------------------- LUCA (added npol to the plane-wave size)----------------------- + ALLOCATE (spsi(npwx*npol,nbnd)) + ALLOCATE (wfcatom(npwx*npol,natomwfc)) IF (Hubbard_projectors.EQ."ortho-atomic") THEN - ALLOCATE (swfcatom(npwx,natomwfc)) + ALLOCATE (swfcatom(npwx*npol,natomwfc)) + ! ---------------------------------------------------------------- ALLOCATE (eigenval(natomwfc)) ALLOCATE (eigenvect(natomwfc,natomwfc)) ALLOCATE (overlap_inv(natomwfc,natomwfc)) ENDIF ! - CALL allocate_bec_type( nwfcU, nbnd, proj ) + ! ----------------------- LUCA ------------------- + IF (noncolin) THEN + ALLOCATE (proj%k (nwfcU, nbnd)) + ELSE + CALL allocate_bec_type( nwfcU, nbnd, proj ) + ENDIF ! ! poor-man parallelization over bands ! - if nproc_pool=1 : nb_s=1, nb_e=nbnd, mykey=0 @@ -148,7 +169,15 @@ SUBROUTINE force_hub( forceh ) CALL orthoUwfc_k (ik, .TRUE.) ! ! proj= - CALL calbec( npw, wfcU, spsi, proj ) + ! ----------------------- LUCA ------------------------------------ + IF (noncolin) THEN + CALL ZGEMM ('C', 'N', nwfcU, nbnd, npwx*npol, (1.0_DP, 0.0_DP), wfcU, & + npwx*npol, spsi, npwx*npol, (0.0_DP, 0.0_DP), proj%k, nwfcU) + CALL mp_sum( proj%k( :, 1:nbnd ), intra_bgrp_comm ) + ELSE + CALL calbec( npw, wfcU(1:npwx,:), spsi, proj ) + ENDIF + ! ---------------------------------------------------------------- ! ! now we need the first derivative of proj with respect to tau(alpha,ipol) ! @@ -160,27 +189,52 @@ SUBROUTINE force_hub( forceh ) ! DO ipol = 1, 3 ! forces are calculated for coordinate ipol ... ! - IF ( gamma_only ) THEN - CALL dndtau_gamma ( ldim, proj%r, spsi, alpha, ijkb0, ipol, ik, & - nb_s, nb_e, mykey, 1, dns ) - ELSE - CALL dndtau_k ( ldim, proj%k, spsi, alpha, ijkb0, ipol, ik, & - nb_s, nb_e, mykey, 1, dns ) - ENDIF + ! ---------------------------- LUCA -------------------------------- ! !omp parallel do default(shared) private(na,nt,m1,m2,is) - DO na = 1, nat - nt = ityp(na) - IF ( is_hubbard(nt) ) THEN - DO is = 1, nspin - DO m2 = 1, 2*Hubbard_l(nt)+1 - DO m1 = 1, 2*Hubbard_l(nt)+1 - forceh(ipol,alpha) = forceh(ipol,alpha) - & - v%ns(m2,m1,is,na) * dns(m1,m2,is,na) + IF (noncolin) THEN + CALL dndtau_k_nc ( ldim, proj%k, spsi, alpha, ijkb0, ipol, ik, & + nb_s, nb_e, mykey, 1, dns_nc ) + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + DO is = 1, npol + DO is2 = 1, npol + DO m2 = 1, 2*Hubbard_l(nt)+1 + DO m1 = 1, 2*Hubbard_l(nt)+1 + forceh(ipol,alpha) = forceh(ipol,alpha) - & + dble( v%ns_nc(m2, m1, npol*(is-1)+is2, na) * & + dns_nc(m1, m2, npol*(is2-1)+is, na) ) + ENDDO + ENDDO ENDDO ENDDO - ENDDO - ENDIF - ENDDO + ENDIF + ENDDO + ELSE + IF ( gamma_only ) THEN + CALL dndtau_gamma ( ldim, proj%r, spsi, alpha, ijkb0, ipol, ik, & + nb_s, nb_e, mykey, 1, dns ) + ELSE + CALL dndtau_k ( ldim, proj%k, spsi, alpha, ijkb0, ipol, ik, & + nb_s, nb_e, mykey, 1, dns ) + ENDIF + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + DO is = 1, nspin + DO m2 = 1, 2*Hubbard_l(nt)+1 + DO m1 = 1, 2*Hubbard_l(nt)+1 + forceh(ipol,alpha) = forceh(ipol,alpha) - & + v%ns(m2,m1,is,na) * dns(m1,m2,is,na) + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO + ENDIF + ! + ! ------------------------------------------------------------------- +! !omp parallel do default(shared) private(na,nt,m1,m2,is) ! !omp end parallel do ! IF (lhubb) THEN @@ -260,7 +314,13 @@ SUBROUTINE force_hub( forceh ) CALL deallocate_bec_type( proj ) ! IF (lda_plus_u_kind.EQ.0) THEN + ! ------------------- LUCA ------------------------- + IF (noncolin) THEN + DEALLOCATE(dns_nc) + ELSE DEALLOCATE(dns) + ENDIF + ! -------------------------------------------------- IF (ALLOCATED(dnsb)) DEALLOCATE(dnsb) ELSEIF (lda_plus_u_kind.EQ.2) THEN DEALLOCATE(dnsg) @@ -473,6 +533,172 @@ SUBROUTINE dndtau_k ( ldim, proj, spsi, alpha, jkb0, ipol, ik, nb_s, & ! END SUBROUTINE dndtau_k ! +! ------------------------------- LUCA ----------------------------------------- +SUBROUTINE dndtau_k_nc ( ldim, proj, spsi, alpha, jkb0, ipol, ik, nb_s, & + nb_e, mykey, lpuk, dns_nc ) + !--------------------------------------------------------------------------- + !! This routine computes the derivative of the ns with respect to the ionic + !! displacement \(u(\text{alpha,ipol})\) used to obtain the Hubbard + !! contribution to the atomic forces. + ! + USE kinds, ONLY : DP + USE ions_base, ONLY : nat, ityp + USE lsda_mod, ONLY : nspin, current_spin + USE ldaU, ONLY : is_hubbard, Hubbard_l, nwfcU, offsetU, & + is_hubbard_back, offsetU_back, ldim_u, & + offsetU_back1, ldim_back, Hubbard_l_back, & + backall, U_projection, wfcU + USE noncollin_module, ONLY : npol + USE wvfct, ONLY : nbnd, npwx, wg + USE mp_pools, ONLY : intra_pool_comm, me_pool, nproc_pool + USE mp_bands, ONLY : intra_bgrp_comm + USE mp, ONLY : mp_sum + USE wavefunctions, ONLY : evc + USE uspp, ONLY : okvan + USE force_mod, ONLY : doverlap_inv + USE basis, ONLY : natomwfc + ! + IMPLICIT NONE + ! + ! I/O variables + ! + INTEGER, INTENT(IN) :: ldim + !! ldim = 2*Hubbard_lmax+1 + COMPLEX (DP), INTENT(IN) :: proj(nwfcU,nbnd) + !! projection + ! ----------- LUCA (added npol to the size of spsi) ----------------------- + COMPLEX(DP), INTENT(IN) :: spsi(npwx*npol,nbnd) + !! \(S|\ \text{evc}\rangle\) + INTEGER, INTENT(IN) :: alpha + !! the displaced atom index + INTEGER, INTENT(IN) :: jkb0 + !! positions of beta functions for atom alpha + INTEGER, INTENT(IN) :: ipol + !! the component of displacement + INTEGER, INTENT(IN) :: ik + !! k-point index + INTEGER, INTENT(IN) :: nb_s + !! starting band number (for band parallelization) + INTEGER, INTENT(IN) :: nb_e + !! ending band number (for band parallelization) + INTEGER, INTENT(IN) :: mykey + !! If each band appears more than once + !! compute its contribution only once (i.e. when mykey=0) + INTEGER, INTENT(IN) :: lpuk + !! index to control the standard (lpuk=1) or + !! background (lpuk=2) contribution to the force + COMPLEX(DP), INTENT(OUT) :: dns_nc(ldim,ldim,nspin,nat) + !! the derivative of the atomic occupations + ! + ! ... local variables + ! + INTEGER :: ibnd, is1, is2, na, nt, m1, m2, off1, off2, m11, m22, ldim1, i, j + REAL(DP) :: psum + COMPLEX(DP), ALLOCATABLE :: dproj(:,:), dproj_us(:,:) + ! + CALL start_clock( 'dndtau' ) + ! + ALLOCATE ( dproj(nwfcU,nb_s:nb_e) ) + ! + ! Compute the derivative of occupation matrices (the quantities dns_nc(m1,m2,i)) + ! of the atomic orbitals. They are complex quantities as well as dns_nc(m1,m2,i). + ! + dns_nc(:,:,:,:) = 0.d0 + ! + ! Band parallelization. If each band appears more than once + ! compute its contribution only once (i.e. when mykey=0) + ! + IF ( mykey /= 0 ) GO TO 10 + ! + ! Compute the USPP contribution to dproj: + ! <\phi^{at}_{I,m1}|dS/du(alpha,ipol)|\psi_{k,v,s}> + ! + IF (okvan) THEN + ALLOCATE ( dproj_us(nwfcU,nb_s:nb_e) ) + CALL matrix_element_of_dSdtau (alpha, ipol, ik, jkb0, & + nwfcU, wfcU, nbnd, evc, dproj_us, nb_s, nb_e, mykey) + ENDIF + ! + ! In the 'ortho-atomic' case calculate d[(O^{-1/2})^T] + ! + IF (U_projection.EQ."ortho-atomic") THEN + ALLOCATE ( doverlap_inv(natomwfc,natomwfc) ) + CALL calc_doverlap_inv (alpha, ipol, ik, jkb0) + ENDIF + ! +! !omp parallel do default(shared) private(na,nt,m1,m2,ibnd) + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + ! + ! Compute the second contribution to dproj due to the derivative of + ! (ortho-)atomic orbitals + CALL dprojdtau_k ( spsi, alpha, na, jkb0, ipol, ik, nb_s, nb_e, mykey, dproj ) + IF (okvan) dproj = dproj + dproj_us + ! + ldim1 = 2*Hubbard_l(nt)+1 + DO is1 = 1, npol + DO is2 = 1, npol + i = npol*(is1-1) + is2 + DO m1 = 1, ldim1 + DO m2 = 1, ldim1 + DO ibnd = nb_s, nb_e + dns_nc(m1,m2,i,na) = dns_nc(m1,m2,i,na) + & + wg(ibnd,ik) * dcmplx(CONJG(dproj(offsetU(na)+m2+ldim1*(is2-1),ibnd) )* & + proj(offsetU(na)+m1+ldim1*(is1-1),ibnd) + & + CONJG(proj(offsetU(na)+m2+ldim1*(is2-1),ibnd) )* & + dproj(offsetU(na)+m1+ldim1*(is1-1),ibnd)) + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO +! !omp end parallel do + ! +10 DEALLOCATE( dproj ) + IF (ALLOCATED(doverlap_inv)) DEALLOCATE( doverlap_inv ) + IF (okvan) DEALLOCATE (dproj_us) + ! + CALL mp_sum( dns_nc, intra_pool_comm ) + ! + ! Impose hermiticity of dns_nc{m1,m2,i} + ! +! !omp parallel do default(shared) private(na,is,m1,m2) + DO na = 1, nat + nt = ityp (na) + IF ( is_hubbard(nt) ) THEN + DO is1 = 1, npol + DO is2 = 1, npol + i = npol*(is1-1) + is2 + j = is1 + npol*(is2-1) + ldim1 = 2*Hubbard_l(nt)+1 + DO m1 = 1, ldim1 + DO m2 = 1, ldim1 + psum = ABS( dns_nc(m1,m2,i,na) - CONJG(dns_nc(m2,m1,j,na)) ) + IF (psum.GT.1.d-10) THEN + ! print*, na, m1, m2, is1, is2 + ! print*, dns_nc(m1,m2,i,na) + ! print*, dns_nc(m2,m1,j,na) + CALL errore( 'dns_nc', 'non hermitean matrix', 1 ) + ELSE + dns_nc(m2,m1,j,na) = CONJG( dns_nc(m1,m2,i,na) ) + ENDIF + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO +! !omp end parallel do + ! + CALL stop_clock( 'dndtau' ) + ! + RETURN + ! +END SUBROUTINE dndtau_k_nc +!------------------------------------------------------------------------------- !----------------------------------------------------------------------- SUBROUTINE dndtau_gamma ( ldim, rproj, spsi, alpha, jkb0, ipol, ik, & nb_s, nb_e, mykey, lpuk, dns ) @@ -1012,6 +1238,7 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr is_hubbard_back, Hubbard_l2, offsetU_back, & offsetU_back1, ldim_u, backall, lda_plus_u_kind, & Hubbard_projectors, oatwfc + USE noncollin_module, ONLY : noncolin, npol USE wvfct, ONLY : nbnd, npwx, wg USE uspp, ONLY : nkb, vkb, okvan USE uspp_param, ONLY : nh @@ -1023,12 +1250,14 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr USE basis, ONLY : natomwfc, wfcatom, swfcatom USE force_mod, ONLY : eigenval, eigenvect, overlap_inv, doverlap_inv USE wavefunctions_gpum, ONLY : using_evc + USE ldaU, ONLY : is_hubbard, Hubbard_l, offsetU ! IMPLICIT NONE ! ! I/O variables ! - COMPLEX(DP), INTENT(IN) :: spsi(npwx,nbnd) + ! ---------------- LUCA (added npol to the size of spsi) --------------------- + COMPLEX(DP), INTENT(IN) :: spsi(npwx*npol,nbnd) !! \(S\ |\text{evc}\rangle\) INTEGER, INTENT(IN) :: alpha !! the displaced atom @@ -1080,7 +1309,7 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr ! localized nature of atomic wfc). ! Note: parallelization here is over plane waves, not over bands! ! - ALLOCATE ( dwfc(npwx,ldim) ) + ALLOCATE ( dwfc(npwx*npol,ldim*npol) ) dwfc(:,:) = (0.d0, 0.d0) ! ! DFT+U: In the expression of dwfc we don't need (k+G) but just G; k always @@ -1107,15 +1336,27 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr - ldim_std - 2*Hubbard_l2(nt) - 1 ENDIF dwfc(ig,m1) = (0.d0,-1.d0) * gvec * wfcU(ig,offpm) + ! --------------------- LUCA ------------------- + IF (noncolin) dwfc(ig+npwx,m1+ldim) = (0.d0,-1.d0) * gvec * wfcU(ig+npwx,offpm+ldim) + ! ------------------------------------------- ENDDO ! ENDDO !!omp end parallel do ! - ALLOCATE ( dproj0(ldim,nbnd) ) - CALL ZGEMM('C','N',ldim, nbnd, npw, (1.d0,0.d0), & - dwfc, npwx, spsi, npwx, (0.d0,0.d0), & - dproj0, ldim) + ! ------------------- LUCA ------------------------- + IF (noncolin) THEN + ALLOCATE ( dproj0(ldim*npol,nbnd) ) + CALL ZGEMM('C','N',ldim*npol, nbnd, npwx*npol, (1.d0,0.d0), & + dwfc, npwx*npol, spsi, npwx*npol, (0.d0,0.d0), & + dproj0, ldim*npol) + ELSE + ALLOCATE ( dproj0(ldim,nbnd) ) + CALL ZGEMM('C','N',ldim, nbnd, npw, (1.d0,0.d0), & + dwfc, npwx, spsi, npwx, (0.d0,0.d0), & + dproj0, ldim) + ENDIF + ! -------------------------------------------------- ! DEALLOCATE ( dwfc ) CALL mp_sum( dproj0, intra_bgrp_comm ) @@ -1133,6 +1374,9 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr - ldim_std - 2*Hubbard_l2(nt) - 1 ENDIF dproj(offpm, :) = dproj0(m1, nb_s:nb_e) + ! ----------------- LUCA -------------------------------- + IF (noncolin) dproj(offpm+ldim, :) = dproj0(m1+ldim, nb_s:nb_e) + ! ------------------------------------------------------- ENDDO ENDIF DEALLOCATE ( dproj0 ) @@ -1152,7 +1396,8 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr IF (is_hubbard_back(nt)) CALL errore("dprojdtau_k", & " Forces with background and ortho-atomic are not supported",1) ! - ALLOCATE (dwfc(npwx,ldim)) + ! ------------- LUCA (added npol) ------------------------------- + ALLOCATE (dwfc(npwx*npol,ldim*npol)) dwfc(:,:) = (0.d0, 0.d0) ! ! Determine how many atomic wafefunctions there are for atom 'alpha' @@ -1172,10 +1417,14 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr offpm = oatwfc(na) ! offset DO ig = 1, npw gvec = (g(ipol,igk_k(ig,ik)) + xk(ipol,ik)) * tpiba - DO m1 = 1, ldim + DO m1 = 1, ldim*npol DO m2 = m_start, m_end dwfc(ig,m1) = dwfc(ig,m1) + (0.d0,-1.d0) * gvec * & overlap_inv(offpm+m1,m2) * wfcatom(ig,m2) + ! ---------- LUCA ----------------- + ! + IF (noncolin) dwfc(ig+npwx,m1) = dwfc(ig+npwx,m1) + (0.d0,-1.d0) * gvec * & + overlap_inv(offpm+m1,m2) * wfcatom(ig+npwx,m2) ENDDO ENDDO ENDDO @@ -1192,17 +1441,17 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr ! dwfc(ig,m1) = dwfc(ig,m1) + wfcatom(ig,m2) * doverlap_inv(m2,offpm+m1) ! where m1=1,ldim; m2=1,natomwfc; ig=1,npw ! - CALL ZGEMM('N','N', npw, ldim, natomwfc, (1.d0,0.d0), & - wfcatom, npwx, doverlap_inv(:,offpm+1:offpm+ldim), & - natomwfc, (1.d0,0.d0), dwfc, npwx) + CALL ZGEMM('N','N', npwx*npol, ldim*npol, natomwfc, (1.d0,0.d0), & + wfcatom, npwx*npol, doverlap_inv(:,offpm+1:offpm+ldim*npol), & + natomwfc, (1.d0,0.d0), dwfc, npwx*npol) ! ! 3. Final step: compute dproj0 = ! - ALLOCATE ( dproj0(ldim,nbnd) ) + ALLOCATE ( dproj0(ldim*npol,nbnd) ) dproj0(:,:) = (0.0d0, 0.0d0) - CALL ZGEMM('C','N',ldim, nbnd, npw, (1.d0,0.d0), & - dwfc, npwx, spsi, npwx, (0.d0,0.d0), & - dproj0, ldim) + CALL ZGEMM('C','N',ldim*npol, nbnd, npwx*npol, (1.d0,0.d0), & + dwfc, npwx*npol, spsi, npwx*npol, (0.d0,0.d0), & + dproj0, ldim*npol) CALL mp_sum( dproj0, intra_bgrp_comm ) ! ! Copy to dproj results for the bands treated by this processor @@ -1211,6 +1460,9 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr IF (mykey==0) THEN DO m1 = 1, ldim dproj( offpm+m1, :) = dproj0(m1, nb_s:nb_e) + ! ----------------- LUCA -------------------------------- + IF (noncolin) dproj(offpm+m1+ldim, :) = dproj0(m1+ldim, nb_s:nb_e) + ! ------------------------------------------------------- ENDDO ENDIF ! @@ -1218,7 +1470,7 @@ SUBROUTINE dprojdtau_k( spsi, alpha, na, ijkb0, ipol, ik, nb_s, nb_e, mykey, dpr DEALLOCATE (dwfc) ! ENDIF - ! + ! CALL stop_clock('dprojdtau') ! RETURN @@ -1235,6 +1487,7 @@ SUBROUTINE natomwfc_per_atom(alpha, m_start, m_end) USE uspp_param, ONLY : upf USE ldaU, ONLY : Hubbard_l USE io_global, ONLY : stdout + USE noncollin_module, ONLY : noncolin, npol ! IMPLICIT NONE ! @@ -1255,7 +1508,15 @@ SUBROUTINE natomwfc_per_atom(alpha, m_start, m_end) DO nb = 1, upf(nt)%nwfc IF (upf(nt)%oc(nb) >= 0.d0) THEN l = upf(nt)%lchi(nb) - counter = counter + 2*l + 1 + ! ----------------- LUCA ----------------------- + IF (noncolin.and.(.not.upf(nt)%has_so)) THEN + counter = counter + 2*(2*l + 1) + ELSEIF (upf(nt)%has_so.and.(l==0)) THEN + counter = counter + 2 + ELSE + counter = counter + 2*l + 1 + ENDIF + ! ----------------------------------- ENDIF ENDDO IF (na == alpha) THEN @@ -1280,6 +1541,8 @@ SUBROUTINE calc_doverlap_inv (alpha, ipol, ik, ijkb0) ! This routine computes the derivative of O^{-1/2} transposed ! USE kinds, ONLY : DP + USE wvfct, ONLY : npwx + USE noncollin_module, ONLY : noncolin USE cell_base, ONLY : tpiba USE gvect, ONLY : g USE uspp, ONLY : okvan @@ -1330,13 +1593,19 @@ SUBROUTINE calc_doverlap_inv (alpha, ipol, ik, ijkb0) DO m2 = 1, natomwfc doverlap(m1,m2) = doverlap(m1,m2) + (0.d0,1.d0) * gvec & * CONJG(wfcatom(ig,m1)) * swfcatom(ig,m2) - ENDDO + ! --------- LUCA ------------------- + IF (noncolin) doverlap(m1,m2) = doverlap(m1,m2) + (0.d0,1.d0) * gvec & + * CONJG(wfcatom(ig+npwx,m1)) * swfcatom(ig+npwx,m2) + ENDDO ENDDO ! Calculate < phi_I | S | dphi_J/d\tau(alpha,ipol) > DO m1 = 1, natomwfc DO m2 = m_start, m_end doverlap(m1,m2) = doverlap(m1,m2) + (0.d0,-1.d0) * gvec & * CONJG(swfcatom(ig,m1)) * wfcatom(ig,m2) + ! --------------------- LUCA ------------------- + IF (noncolin) doverlap(m1,m2) = doverlap(m1,m2) + (0.d0,-1.d0) * gvec & + * CONJG(swfcatom(ig+npwx,m1)) * wfcatom(ig+npwx,m2) ENDDO ENDDO ENDDO @@ -1348,8 +1617,9 @@ SUBROUTINE calc_doverlap_inv (alpha, ipol, ik, ijkb0) ! IF (okvan) THEN ALLOCATE(doverlap_us(natomwfc,natomwfc)) + ! -------------------- LUCA (added flag = .true.)-------------------- CALL matrix_element_of_dSdtau (alpha, ipol, ik, ijkb0, & - natomwfc, wfcatom, natomwfc, wfcatom, doverlap_us, 1, natomwfc, 0) + natomwfc, wfcatom, natomwfc, wfcatom, doverlap_us, 1, natomwfc, 0, .true.) doverlap = doverlap + doverlap_us DEALLOCATE(doverlap_us) ENDIF @@ -1368,22 +1638,26 @@ SUBROUTINE calc_doverlap_inv (alpha, ipol, ik, ijkb0) ! END SUBROUTINE calc_doverlap_inv ! -SUBROUTINE matrix_element_of_dSdtau (alpha, ipol, ik, ijkb0, lA, A, lB, B, A_dS_B, lB_s, lB_e, mykey) +SUBROUTINE matrix_element_of_dSdtau (alpha, ipol, ik, ijkb0, lA, A, lB, B, A_dS_B, lB_s, lB_e, mykey, flag) ! ! This routine computes the matrix element < A | dS/d\tau(alpha,ipol) | B > ! Written by I. Timrov (2020) ! + USE ldaU, ONLY : is_hubbard, hubbard_l, offsetU USE kinds, ONLY : DP USE ions_base, ONLY : nat, ntyp => nsp, ityp USE cell_base, ONLY : tpiba USE gvect, ONLY : g USE wvfct, ONLY : npwx, wg - USE uspp, ONLY : nkb, vkb, qq_at, okvan - USE uspp_param, ONLY : nh + USE uspp, ONLY : nkb, vkb, qq_at, qq_so, okvan + USE uspp_param, ONLY : nh, upf USE klist, ONLY : igk_k, ngk USE wavefunctions, ONLY : evc USE becmod, ONLY : calbec USE wavefunctions_gpum, ONLY : using_evc + USE noncollin_module, ONLY : noncolin, npol, lspinorb + USE mp, ONLY : mp_sum + USE mp_bands, ONLY : intra_bgrp_comm ! IMPLICIT NONE ! @@ -1396,14 +1670,18 @@ SUBROUTINE matrix_element_of_dSdtau (alpha, ipol, ik, ijkb0, lA, A, lB, B, A_dS_ INTEGER, INTENT(IN) :: lA, lB, lB_s, lB_e ! There is a possibility to parallelize over lB, ! where lB_s (start) and lB_e (end) - COMPLEX(DP), INTENT(IN) :: A(npwx,lA) - COMPLEX(DP), INTENT(IN) :: B(npwx,lB) + ! ------------------ LUCA (added npol to the size of A and B) ---------------------- + LOGICAL, OPTIONAL :: flag ! noncollinear: controlling whether + ! calculating + ! or (= .true.) + COMPLEX(DP), INTENT(IN) :: A(npwx*npol,lA) + COMPLEX(DP), INTENT(IN) :: B(npwx*npol,lB) COMPLEX(DP), INTENT(OUT) :: A_dS_B(lA,lB_s:lB_e) INTEGER, INTENT(IN) :: mykey ! ! Local variables ! - INTEGER :: npw, nt, ih, jh, ig, iA, iB + INTEGER :: npw, nt, ih, jh, ig, iA, iB, mU, mD, nt1, ldim, na REAL(DP) :: gvec COMPLEX (DP), ALLOCATABLE :: Adbeta(:,:), Abeta(:,:), & dbetaB(:,:), betaB(:,:), & @@ -1416,66 +1694,166 @@ SUBROUTINE matrix_element_of_dSdtau (alpha, ipol, ik, ijkb0, lA, A, lB, B, A_dS_ nt = ityp(alpha) npw = ngk(ik) ! - ALLOCATE ( Adbeta(lA,nh(nt)) ) - ALLOCATE ( Abeta(lA,nh(nt)) ) - ALLOCATE ( dbetaB(nh(nt),lB) ) - ALLOCATE ( betaB(nh(nt),lB) ) - ALLOCATE ( qq(nh(nt),nh(nt)) ) + ! ------------------- LUCA (added npol) ------------------------------- + ALLOCATE ( Adbeta(lA,npol*nh(nt)) ) + ALLOCATE ( Abeta(lA,npol*nh(nt)) ) + ALLOCATE ( dbetaB(npol*nh(nt),lB) ) + ALLOCATE ( betaB(npol*nh(nt),lB) ) + ALLOCATE ( qq(nh(nt)*npol,nh(nt)*npol) ) ! - qq(:,:) = CMPLX(qq_at(1:nh(nt),1:nh(nt),alpha), 0.0d0, kind=DP) + ! ----------- LUCA ---------------------------- + IF (noncolin) THEN + IF ( upf(nt)%has_so ) THEN + qq(1:nh(nt),1:nh(nt)) = qq_so(:,:,1,nt) + qq(1:nh(nt),1+nh(nt):nh(nt)*npol) = qq_so(:,:,2,nt) + qq(1+nh(nt):nh(nt)*npol,1:nh(nt)) = qq_so(:,:,3,nt) + qq(1+nh(nt):nh(nt)*npol,1+nh(nt):nh(nt)*npol) = qq_so(:,:,4,nt) + ELSE + qq(1:nh(nt),1:nh(nt)) = qq_at(1:nh(nt),1:nh(nt),alpha) + qq(1:nh(nt),1+nh(nt):nh(nt)*npol) = (0.0,0.0) + qq(1+nh(nt):nh(nt)*npol,1:nh(nt)) = (0.0,0.0) + qq(1+nh(nt):nh(nt)*npol,1+nh(nt):nh(nt)*npol) = qq_at(1:nh(nt),1:nh(nt),alpha) + ENDIF + ELSE + qq(:,:) = CMPLX(qq_at(1:nh(nt),1:nh(nt),alpha), 0.0d0, kind=DP) + ENDIF ! ! aux is used as a workspace - ALLOCATE ( aux(npwx,nh(nt)) ) + ! ------------------ LUCA (added npol to the size of aux) ---------------------- + ALLOCATE ( aux(npwx*npol,nh(nt)*npol) ) aux(:,:) = (0.0d0, 0.0d0) ! - ! !!omp parallel do default(shared) private(ig,ih) ! Beta function DO ih = 1, nh(nt) DO ig = 1, npw aux(ig,ih) = vkb(ig,ijkb0+ih) + ! ------------------ LUCA ------------------ + IF (noncolin) aux(ig+npwx,ih+nh(nt)) = vkb(ig,ijkb0+ih) + ! ------------------------------------------ ENDDO ENDDO !!omp end parallel do ! - ! Calculate Abeta = - CALL calbec ( npw, A, aux, Abeta ) - ! - ! Calculate betaB = - CALL calbec( npw, aux, B, betaB ) - ! + ! ------------------------------- LUCA --------------------------- + IF (noncolin) THEN + ! Calculate Abeta = + ! Abeta(:,1 : nh(nt)) = spin up + ! Abeta(:,1+nh(nt): nh(nt)*npol) = spin down + ! + Abeta=(0.0,0.0) + CALL ZGEMM ('C', 'N', lA, nh(nt)*npol, npwx*npol, (1.0_DP, 0.0_DP), A, & + npwx*npol, aux, npwx*npol, (0.0_DP, 0.0_DP), Abeta, lA) + CALL mp_sum( Abeta(:, 1:nh(nt)*npol) , intra_bgrp_comm ) + ! + ! Calculate betaB = + ! betaB(:,1 : nh(nt)) = spin up + ! betaB(:,1+nh(nt): nh(nt)*npol) = spin down + ! + betaB=(0.0,0.0) + CALL ZGEMM ('C', 'N', nh(nt)*npol, lB, npwx*npol, (1.0_DP, 0.0_DP), aux, & + npwx*npol, B, npwx*npol, (0.0_DP, 0.0_DP), betaB, nh(nt)*npol) + CALL mp_sum( betaB(:, 1:lB) , intra_bgrp_comm ) + ELSE + ! Calculate Abeta = + CALL calbec ( npw, A, aux, Abeta ) + ! + ! Calculate betaB = + CALL calbec( npw, aux, B, betaB ) + ENDIF + ! --------------------------------------------------------------------------- !!omp parallel do default(shared) private(ig,ih) ! Calculate the derivative of the beta function DO ih = 1, nh(nt) DO ig = 1, npw gvec = g(ipol,igk_k(ig,ik)) * tpiba aux(ig,ih) = (0.d0,-1.d0) * aux(ig,ih) * gvec + ! -------------------------- LUCA --------------------------- + IF (noncolin) aux(ig+npwx,ih+nh(nt)) = & + (0.d0,-1.d0) * aux(ig+npwx,ih+nh(nt)) * gvec + ! ------------------------------------------------------------------- ENDDO ENDDO !!omp end parallel do - ! - ! Calculate Adbeta = - CALL calbec ( npw, A, aux, Adbeta ) - ! - ! Calculate dbetaB = - CALL calbec ( npw, aux, B, dbetaB ) - ! + ! ------------------------------- LUCA --------------------------- + IF (noncolin) THEN + ! Calculate Adbeta = + ! (same as Abeta) + ! + Adbeta=(0.0,0.0) + CALL ZGEMM ('C', 'N', lA, nh(nt)*npol, npwx*npol, (1.0_DP, 0.0_DP), A, & + npwx*npol, aux, npwx*npol, (0.0_DP, 0.0_DP), Adbeta, lA) + CALL mp_sum( Adbeta(:, 1:nh(nt)*npol) , intra_bgrp_comm ) + ! + ! Calculate dbetaB = + ! (same as betaB) + ! + dbetaB=(0.0,0.0) + CALL ZGEMM ('C', 'N', nh(nt)*npol, lB, npwx*npol, (1.0_DP, 0.0_DP), aux, & + npwx*npol, B, npwx*npol, (0.0_DP, 0.0_DP), dbetaB, nh(nt)*npol) + CALL mp_sum( dbetaB(:, 1:lB) , intra_bgrp_comm ) + ELSE + ! Calculate Adbeta = + CALL calbec ( npw, A, aux, Adbeta ) + ! + ! Calculate dbetaB = + CALL calbec ( npw, aux, B, dbetaB ) + ENDIF + ! --------------------------------------------------------------------------- DEALLOCATE ( aux ) ! ! Here starts a parallelization over lB_s and lB_e ! - ALLOCATE ( aux(nh(nt),lB) ) + ! ------------ LUCA (added npol)------------------ + ALLOCATE ( aux(nh(nt)*npol,lB) ) + aux(:,:)=(0.0,0.0) ! - ! Calculate \sum_jh qq_at(ih,jh) * dbetaB(jh) - CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - qq, nh(nt), dbetaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & - aux(1,lB_s), nh(nt)) + ! ----------------------- LUCA ---------------------------- + IF (noncolin) THEN + ! aux(:, 1:nh(nt)) = \sum_jh qq(1,jh)*dbetaB(1,jh) + qq(2,jh)*dbetaB(2,jh) + ! aux(:, 1+nh(nt):nh(nt)*npol) = \sum_jh qq(3,jh)*dbetaB(1,jh) + qq(4,jh)*dbetaB(2,jh) + ! + ! spin up + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1, 1), nh(nt)*npol, & + dbetaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1, lB_s), nh(nt)*npol) + ! spin down + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1+nh(nt), 1), nh(nt)*npol, & + dbetaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1+nh(nt), lB_s), nh(nt)*npol) + ELSE + ! Calculate \sum_jh qq_at(ih,jh) * dbetaB(jh) + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + qq, nh(nt), dbetaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & + aux(1,lB_s), nh(nt)) + ENDIF + ! ---------------------------------------------------------- dbetaB(:,:) = aux(:,:) ! - ! Calculate \sum_jh qq_at(ih,jh) * betaB(jh) - CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - qq, nh(nt), betaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & - aux(1,lB_s), nh(nt)) + aux(:,:)=(0.0,0.0) + ! ----------------------- LUCA ---------------------------- + IF (noncolin) THEN + ! (same as dbetaB) + ! + ! spin up + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1, 1), nh(nt)*npol, & + betaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1, lB_s), nh(nt)*npol) + ! spin down + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1+nh(nt), 1), nh(nt)*npol, & + betaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1+nh(nt), lB_s), nh(nt)*npol) + ELSE + ! Calculate \sum_jh qq_at(ih,jh) * betaB(jh) + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + qq, nh(nt), betaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & + aux(1,lB_s), nh(nt)) + ENDIF + ! ---------------------------------------------------------- betaB(:,:) = aux(:,:) ! DEALLOCATE ( aux ) @@ -1485,20 +1863,61 @@ SUBROUTINE matrix_element_of_dSdtau (alpha, ipol, ik, ijkb0, lA, A, lB, B, A_dS_ ! Only A_dS_B(:,lB_s:lB_e) are calculated ! IF ( mykey == 0 ) THEN - CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - Adbeta, lA, betaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & - A_dS_B(1,lB_s), lA) - CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - Abeta, lA, dbetaB(1,lB_s), nh(nt), (1.0d0,0.0d0), & - A_dS_B(1,lB_s), lA) - ENDIF + ! ----------------------- LUCA ---------------------------- + IF (noncolin) THEN + nt1 = nh(nt) + 1 + IF ( .NOT.PRESENT(flag) ) THEN + DO na = 1, nat + IF ( is_hubbard(ityp(na)) ) THEN + ldim = 2*hubbard_l(ityp(na)) + 1 + mU = offsetU(na) + 1 + mD = mU + ldim + ! + ! spin up + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Adbeta(mU,1), lA, & + betaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + A_dS_B(mU, lB_s), lA) + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Abeta(mU,1), lA, & + dbetaB(1, lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(mU, lB_s), lA) + ! spin down + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Adbeta(mD, nt1), lA, & + betaB(nt1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + A_dS_B(mD,lB_s), lA) + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Abeta(mD, nt1), lA, & + dbetaB(nt1, lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(mD,lB_s), lA) + ENDIF + ENDDO + ELSEIF ( PRESENT(flag) ) THEN + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + Adbeta, lA, betaB(1,lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + Abeta, lA, dbetaB(1,lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + ENDIF + ELSE + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Adbeta, lA, betaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Abeta, lA, dbetaB(1,lB_s), nh(nt), (1.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + ENDIF + ENDIF + ! ---------------------------------------------------------- ! DEALLOCATE ( Abeta ) DEALLOCATE ( Adbeta ) DEALLOCATE ( dbetaB ) DEALLOCATE ( betaB ) DEALLOCATE ( qq ) - ! + ! RETURN ! END SUBROUTINE matrix_element_of_dSdtau diff --git a/PW/src/gen_at_dj.f90 b/PW/src/gen_at_dj.f90 index c5ca44c56..e76eef92a 100644 --- a/PW/src/gen_at_dj.f90 +++ b/PW/src/gen_at_dj.f90 @@ -24,12 +24,14 @@ SUBROUTINE gen_at_dj( ik, dwfcat ) USE wvfct, ONLY: npwx USE uspp_param, ONLY: upf, nwfcm USE basis, ONLY: natomwfc + USE noncollin_module, ONLY: noncolin, npol ! IMPLICIT NONE ! INTEGER, INTENT(IN) :: ik !! k-point index - COMPLEX(DP), INTENT(OUT) :: dwfcat(npwx,natomwfc) + ! ------------ LUCA (added npol to the plane-wave size) ----------- + COMPLEX(DP), INTENT(OUT) :: dwfcat(npwx*npol,natomwfc) !! the derivative of the atomic wfcs (all) ! ! ... local variables @@ -38,7 +40,7 @@ SUBROUTINE gen_at_dj( ik, dwfcat ) REAL(DP) :: arg COMPLEX(DP) :: phase, pref REAL(DP), ALLOCATABLE :: gk(:,:), q(:), ylm(:,:), djl(:,:,:) - COMPLEX(DP), ALLOCATABLE :: sk(:) + COMPLEX(DP), ALLOCATABLE :: sk(:), aux(:) ! npw = ngk(ik) ! calculate max angular momentum required in wavefunctions @@ -67,7 +69,7 @@ SUBROUTINE gen_at_dj( ik, dwfcat ) ! DEALLOCATE( q, gk ) ! - ALLOCATE( sk(npw) ) + ALLOCATE( sk(npw), aux(npw) ) ! iatw = 0 DO na=1,nat @@ -88,13 +90,23 @@ SUBROUTINE gen_at_dj( ik, dwfcat ) IF ( upf(nt)%oc(nb) >= 0.d0 ) THEN l = upf(nt)%lchi(nb) pref = (0.d0,1.d0)**l - DO m = 1,2*l+1 - lm = l*l+m - iatw = iatw+1 - DO ig=1,npw - dwfcat(ig,iatw) = djl(ig,nb,nt)*sk(ig)*ylm(ig,lm)*pref + ! --------------------- LUCA ------------------------ + IF (noncolin) THEN + IF ( upf(nt)%has_so ) THEN + CALL dj_wfc_atom( .TRUE. ) + ELSE + CALL dj_wfc_atom( .FALSE. ) + ENDIF + ELSE + DO m = 1,2*l+1 + lm = l*l+m + iatw = iatw+1 + DO ig=1,npw + dwfcat(ig,iatw) = djl(ig,nb,nt)*sk(ig)*ylm(ig,lm)*pref + ENDDO ENDDO - ENDDO + ENDIF + ! ------------------------------------------------------ ENDIF ENDDO ENDDO @@ -104,9 +116,83 @@ SUBROUTINE gen_at_dj( ik, dwfcat ) CALL errore( 'gen_at_dj', 'unexpected error', 1 ) ENDIF - DEALLOCATE( sk ) + DEALLOCATE( sk, aux ) DEALLOCATE( djl, ylm ) ! RETURN ! + ! ------------------- LUCA ------------------------- + CONTAINS + ! + SUBROUTINE dj_wfc_atom( soc ) + !--------------------------- + ! + LOGICAL :: soc + !! .TRUE. if the fully-relativistic pseudo + ! + ! ... local variables + ! + REAL(DP) :: j + REAL(DP), ALLOCATABLE :: chiaux(:) + INTEGER :: nc, ib + ! + ! ... If SOC go on only if j=l+1/2 + IF (soc) j = upf(nt)%jchi(nb) + IF (soc .AND. ABS(j-l+0.5_DP)<1.d-4 ) RETURN + ! + ALLOCATE( chiaux(npw) ) + ! + IF (soc) THEN + ! + ! ... Find the index for j=l-1/2 + ! + IF (l == 0) THEN + chiaux(:)=djl(:,nb,nt) + ELSE + DO ib=1, upf(nt)%nwfc + IF ((upf(nt)%lchi(ib) == l) .AND. & + (ABS(upf(nt)%jchi(ib)-l+0.5_DP)<1.d-4)) THEN + nc=ib + exit + ENDIF + ENDDO + ! + ! ... Average the two radial functions + ! + chiaux(:) = (djl(:,nb,nt)*(l+1.0_DP)+djl(:,nc,nt)*l)/(2.0_DP*l+1.0_DP) + ENDIF + ! + ELSE + ! + chiaux(:) = djl(:,nb,nt) + ! + ENDIF + ! + DO m = 1, 2*l+1 + lm = l**2 + m + iatw = iatw + 1 + + IF (iatw + 2*l+1 > natomwfc) CALL errore & + ('dj_wfc_atom', 'internal error: too many wfcs', 1) + DO ig = 1, npw + aux(ig) = sk(ig)*ylm(ig,lm)*chiaux(ig) + ENDDO + ! + DO ig = 1, npw + ! + dwfcat(ig,iatw) = aux(ig) + dwfcat(ig+npwx,iatw) = 0.d0 + ! + dwfcat(ig,iatw+2*l+1) = 0.d0 + dwfcat(ig+npwx,iatw+2*l+1) = aux(ig) + ! + ENDDO + ENDDO + ! + iatw = iatw + 2*l+1 + ! + DEALLOCATE( chiaux ) + ! + END SUBROUTINE dj_wfc_atom + ! END SUBROUTINE gen_at_dj diff --git a/PW/src/gen_at_dy.f90 b/PW/src/gen_at_dy.f90 index 33b171845..2a122fd9f 100644 --- a/PW/src/gen_at_dy.f90 +++ b/PW/src/gen_at_dy.f90 @@ -24,6 +24,7 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) USE wvfct, ONLY: npwx USE uspp_param, ONLY: upf, nwfcm USE basis, ONLY : natomwfc + USE noncollin_module, ONLY: noncolin, npol ! IMPLICIT NONE ! @@ -31,7 +32,8 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) !! k-point index REAL(DP), INTENT(IN) :: u(3) !! unit vector - COMPLEX(DP) :: dwfcat(npwx,natomwfc) + ! ------------ LUCA (added npol to the plane-wave size) ----------- + COMPLEX(DP) :: dwfcat(npwx*npol,natomwfc) !! atomic wfc ! ! ... local variables @@ -43,7 +45,7 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) ! REAL(DP), ALLOCATABLE :: q(:), gk(:,:), dylm(:,:), dylm_u(:,:), & chiq(:,:,:) - COMPLEX(DP), ALLOCATABLE :: sk(:) + COMPLEX(DP), ALLOCATABLE :: sk(:), aux(:) ! npw = ngk(ik) ! calculate max angular momentum required in wavefunctions @@ -52,7 +54,7 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) lmax_wfc = MAX ( lmax_wfc, MAXVAL (upf(nt)%lchi(1:upf(nt)%nwfc) ) ) enddo ! - ALLOCATE( q(npw), gk(3,npw), chiq(npw,nwfcm,ntyp) ) + ALLOCATE( q(npw), gk(3,npw), chiq(npwx,nwfcm,ntyp) ) ! dwfcat(:,:) = (0.d0,0.d0) DO ig = 1,npw @@ -79,7 +81,7 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) q(:) = SQRT(q(:))*tpiba CALL interp_atwfc ( npw, q, nwfcm, chiq ) ! - ALLOCATE( sk(npw) ) + ALLOCATE( sk(npw), aux(npw) ) ! iatw=0 DO na = 1, nat @@ -99,14 +101,23 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) IF ( upf(nt)%oc(nb) >= 0.d0 ) THEN l = upf(nt)%lchi(nb) pref = (0.d0,1.d0)**l - DO m = 1, 2*l+1 - lm = l*l+m - iatw = iatw+1 - DO ig = 1, npw - dwfcat(ig,iatw) = chiq(ig,nb,nt) * sk(ig) * & - dylm_u(ig,lm) * pref / tpiba + ! --------------------- LUCA ------------------------ + IF (noncolin) THEN + IF ( upf(nt)%has_so ) THEN + CALL dy_wfc_atom( .TRUE. ) + ELSE + CALL dy_wfc_atom( .FALSE. ) + ENDIF + ELSE + DO m = 1, 2*l+1 + lm = l*l+m + iatw = iatw+1 + DO ig = 1, npw + dwfcat(ig,iatw) = chiq(ig,nb,nt) * sk(ig) * & + dylm_u(ig,lm) * pref / tpiba + ENDDO ENDDO - ENDDO + ENDIF ENDIF ENDDO ENDDO @@ -116,10 +127,84 @@ SUBROUTINE gen_at_dy( ik, u, dwfcat ) CALL errore( 'gen_at_dy','unexpected error', 1 ) ENDIF ! - DEALLOCATE( sk ) + DEALLOCATE( sk, aux ) DEALLOCATE( dylm_u ) DEALLOCATE( q, gk, chiq ) ! RETURN + ! + CONTAINS + ! + !---------------- LUCA -------------------------- + SUBROUTINE dy_wfc_atom( soc ) + !--------------------------- + ! + LOGICAL :: soc + !! .TRUE. if the fully-relativistic pseudo + ! + ! ... local variables + ! + REAL(DP) :: j + REAL(DP), ALLOCATABLE :: chiaux(:) + INTEGER :: nc, ib + ! + ! ... If SOC go on only if j=l+1/2 + IF (soc) j = upf(nt)%jchi(nb) + IF (soc .AND. ABS(j-l+0.5_DP)<1.d-4 ) RETURN + ! + ALLOCATE( chiaux(npw) ) + ! + IF (soc) THEN + ! + ! ... Find the index for j=l-1/2 + ! + IF (l == 0) THEN + chiaux(:)=chiq(:,nb,nt) + ELSE + DO ib=1, upf(nt)%nwfc + IF ((upf(nt)%lchi(ib) == l) .AND. & + (ABS(upf(nt)%jchi(ib)-l+0.5_DP)<1.d-4)) THEN + nc=ib + exit + ENDIF + ENDDO + ! + ! ... Average the two radial functions + ! + chiaux(:) = (chiq(:,nb,nt)*(l+1.0_DP)+chiq(:,nc,nt)*l)/(2.0_DP*l+1.0_DP) + ENDIF + ! + ELSE + ! + chiaux(:) = chiq(:,nb,nt) + ! + ENDIF + ! + DO m = 1, 2*l+1 + lm = l**2 + m + iatw = iatw + 1 + + IF (iatw + 2*l+1 > natomwfc) CALL errore & + ('dy_wfc_atom', 'internal error: too many wfcs', 1) + DO ig = 1, npw + aux(ig) = sk(ig)*chiaux(ig)* dylm_u(ig,lm) / tpiba + ENDDO + ! + DO ig = 1, npw + ! + dwfcat(ig,iatw) = aux(ig) + dwfcat(ig+npwx,iatw) = 0.d0 + ! + dwfcat(ig,iatw+2*l+1) = 0.d0 + dwfcat(ig+npwx,iatw+2*l+1) = aux(ig) + ! + ENDDO + ENDDO + ! + iatw = iatw + 2*l+1 + ! + DEALLOCATE( chiaux ) + ! + END SUBROUTINE dy_wfc_atom ! END SUBROUTINE gen_at_dy diff --git a/PW/src/input.f90 b/PW/src/input.f90 index 685bdc409..f02836543 100644 --- a/PW/src/input.f90 +++ b/PW/src/input.f90 @@ -1298,6 +1298,22 @@ SUBROUTINE iosys() reserv_ = reserv reserv_back_ = reserv_back ! + ! --- LUCA ----- + ! IF ( lda_plus_u .AND. lda_plus_u_kind == 0 .AND. noncolin ) THEN + ! CALL errore('iosys', 'simplified LDA+U not implemented with & + ! &noncol. magnetism, use lda_plus_u_kind = 1', 1) + ! END IF + ! ------------------ + IF ( lda_plus_u .AND. lda_plus_u_kind == 2 ) THEN + IF ( nat > SIZE(Hubbard_V,1) ) CALL errore('input', & + & 'Too many atoms. The dimensions of Hubbard_V must be increased.',1) + ! In order to increase the dimensions of the Hubbard_V array, + ! change the parameter natx in Modules/parameters.f90 from 50 to the + ! number of atoms in your system. + END IF + lda_plus_u_ = lda_plus_u + lda_plus_u_kind_ = lda_plus_u_kind + ! ! REAL-SPACE TREATMENT ! tqr_ = tqr diff --git a/PW/src/io_rho_xml.f90 b/PW/src/io_rho_xml.f90 index fc8603bb4..ffb3e3d86 100644 --- a/PW/src/io_rho_xml.f90 +++ b/PW/src/io_rho_xml.f90 @@ -82,7 +82,15 @@ MODULE io_rho_xml OPEN ( NEWUNIT=iunocc, FILE = TRIM(dirname) // 'occup.txt', & FORM='formatted', STATUS='unknown' ) IF (lda_plus_u_kind.EQ.0) THEN - WRITE( iunocc, * , iostat = ierr) rho%ns + ! ------- LUCA ---------- + IF (noncolin) THEN + WRITE( iunocc, * , iostat = ierr) rho%ns_nc + ELSE + ! ------------------ + WRITE( iunocc, * , iostat = ierr) rho%ns + ! ------- LUCA ---------- + ENDIF + ! -------------------------- IF (hub_back) WRITE( iunocc, * , iostat = ierr) rho%nsb ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN @@ -181,7 +189,15 @@ MODULE io_rho_xml OPEN ( NEWUNIT=iunocc, FILE = TRIM(dirname) // 'occup.txt', & FORM='formatted', STATUS='old', IOSTAT=ierr ) IF (lda_plus_u_kind.EQ.0) THEN - READ( UNIT = iunocc, FMT = *, iostat = ierr ) rho%ns + ! --------- LUCA ------------ + IF (noncolin) THEN + READ( UNIT = iunocc, FMT = *, iostat = ierr ) rho%ns_nc + ELSE + ! ---------------- + READ( UNIT = iunocc, FMT = *, iostat = ierr ) rho%ns + ! --------- LUCA ------------ + ENDIF + ! ---------------- IF (hub_back) READ( UNIT = iunocc, FMT = * , iostat = ierr) rho%nsb ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN @@ -201,7 +217,15 @@ MODULE io_rho_xml CLOSE( UNIT = iunocc, STATUS = 'KEEP') ELSE IF (lda_plus_u_kind.EQ.0) THEN - rho%ns(:,:,:,:) = 0.D0 + ! ------ LUCA --------- + IF (noncolin) THEN + rho%ns_nc(:,:,:,:) = 0.D0 + ELSE + ! -------------------- + rho%ns(:,:,:,:) = 0.D0 + ! ------ LUCA --------- + ENDIF + ! -------------------- IF (hub_back) rho%nsb(:,:,:,:) = 0.D0 ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN @@ -215,7 +239,15 @@ MODULE io_rho_xml ENDIF ! IF (lda_plus_u_kind.EQ.0) THEN - CALL mp_sum(rho%ns, intra_image_comm) + ! ------ LUCA --------- + IF (noncolin) THEN + CALL mp_sum(rho%ns_nc, intra_image_comm) + ELSE + ! -------------------- + CALL mp_sum(rho%ns, intra_image_comm) + ! ------ LUCA --------- + ENDIF + ! -------------------- IF (hub_back) CALL mp_sum(rho%nsb, intra_image_comm) ELSEIF (lda_plus_u_kind.EQ.1) THEN IF (noncolin) THEN diff --git a/PW/src/ldaU.f90 b/PW/src/ldaU.f90 index e7a613d01..5377108fa 100644 --- a/PW/src/ldaU.f90 +++ b/PW/src/ldaU.f90 @@ -311,8 +311,13 @@ CONTAINS ll(l0b+1:l0b+2*Hubbard_l3(nt)+1,nt) = & Hubbard_l3(nt) ENDIF - ENDDO - ! + ENDDO + ! ---- LUCA ------- + IF (noncolin) THEN + IF ( .NOT. ALLOCATED (d_spin_ldau) ) ALLOCATE( d_spin_ldau(2,2,48) ) + CALL comp_dspinldau() + ENDIF + ! ------------------- ELSEIF ( lda_plus_u_kind == 1 ) THEN ! ! DFT+U(+J) : Liechtenstein's formulation diff --git a/PW/src/orthoatwfc.f90 b/PW/src/orthoatwfc.f90 index ae7f181fd..de2c8b15d 100644 --- a/PW/src/orthoatwfc.f90 +++ b/PW/src/orthoatwfc.f90 @@ -158,7 +158,7 @@ SUBROUTINE orthoUwfc_k (ik, lflag) USE becmod, ONLY : allocate_bec_type, deallocate_bec_type, & bec_type, becp, calbec USE control_flags, ONLY : gamma_only - USE noncollin_module, ONLY : noncolin + USE noncollin_module, ONLY : noncolin, npol USE becmod_subs_gpum, ONLY : using_becp_auto IMPLICIT NONE ! @@ -197,7 +197,8 @@ SUBROUTINE orthoUwfc_k (ik, lflag) ENDIF ! IF (Hubbard_projectors=="ortho-atomic") THEN - ALLOCATE(aux(npwx,natomwfc)) + ! LUCA -------------- added npol ------------------- + ALLOCATE(aux(npwx*npol,natomwfc)) ! Copy atomic wfcs (phi) aux(:,:) = wfcatom(:,:) ENDIF diff --git a/PW/src/potinit.f90 b/PW/src/potinit.f90 index 56c6e828e..5a4059375 100644 --- a/PW/src/potinit.f90 +++ b/PW/src/potinit.f90 @@ -152,8 +152,16 @@ SUBROUTINE potinit() ! IF (lda_plus_u) THEN ! - IF (lda_plus_u_kind == 0) THEN - CALL init_ns() + IF (lda_plus_u_kind == 0) THEN + ! --- LUCA --------------- + IF (noncolin) THEN + CALL init_ns_nc() + ELSE + ! --------------------- + CALL init_ns() + ! --- LUCA ------------- + ENDIF + ! ---------------------- ELSEIF (lda_plus_u_kind == 1) THEN IF (noncolin) THEN CALL init_ns_nc() @@ -273,7 +281,15 @@ SUBROUTINE potinit() WRITE( stdout, '(/5X,"STARTING HUBBARD OCCUPATIONS:")') ! IF (lda_plus_u_kind == 0) THEN - CALL write_ns() + ! ----- LUCA ------------ + IF (noncolin) THEN + CALL write_ns_nc() + ELSE + ! ---------------------- + CALL write_ns() + ! ----- LUCA ------------ + ENDIF + ! ---------------------- ELSEIF (lda_plus_u_kind == 1) THEN IF (noncolin) THEN CALL write_ns_nc() diff --git a/PW/src/stres_hub.f90 b/PW/src/stres_hub.f90 index 51d04e2fd..9e12ff576 100644 --- a/PW/src/stres_hub.f90 +++ b/PW/src/stres_hub.f90 @@ -36,8 +36,8 @@ SUBROUTINE stres_hub ( sigmah ) USE mp_pools, ONLY : inter_pool_comm, me_pool, nproc_pool USE mp, ONLY : mp_sum USE control_flags, ONLY : gamma_only - USE mp_bands, ONLY : use_bgrp_in_hpsi - USE noncollin_module, ONLY : noncolin + USE mp_bands, ONLY : use_bgrp_in_hpsi, intra_bgrp_comm + USE noncollin_module, ONLY : noncolin, npol USE force_mod, ONLY : eigenval, eigenvect, overlap_inv, at_dy, at_dj, & us_dy, us_dj USE wavefunctions_gpum, ONLY : using_evc @@ -51,9 +51,12 @@ SUBROUTINE stres_hub ( sigmah ) ! ! ... local variables ! - INTEGER :: ipol, jpol, na, nt, is, m1, m2, na1, nt1, na2, nt2, viz, ik, npw + INTEGER :: ipol, jpol, na, nt, is, is2, m1, m2, na1, nt1, na2, nt2, viz, ik, npw INTEGER :: ldim, ldim1, ldim2, ldimb, equiv_na2, nb_s, nb_e, mykey, i REAL(DP), ALLOCATABLE :: dns(:,:,:,:), dnsb(:,:,:,:) + ! ---------- LUCA ------------------------------------- + COMPLEX (DP), ALLOCATABLE :: dns_nc(:,:,:,:) + ! ----------------------------------------------------- REAL(DP) :: xyz(3,3) COMPLEX(DP), ALLOCATABLE :: dnsg(:,:,:,:,:) !! the derivative of the atomic occupations @@ -72,23 +75,39 @@ SUBROUTINE stres_hub ( sigmah ) ! IF (noncolin) CALL errore ("stres_hub","Noncollinear case is not supported",1) ! +<<<<<<< HEAD IF (ANY(Hubbard_J(:,:)>eps16)) CALL errore("stres_hub", & " stress in the DFT+U+J scheme is not implemented", 1 ) +======= + ! IF (noncolin) CALL errore ("forceh","Noncollinear case is not supported",1) +>>>>>>> 0c14430b9 (loaded PW implementation) ! sigmah(:,:) = 0.d0 ! - ALLOCATE (spsi(npwx,nbnd)) - ALLOCATE (wfcatom(npwx,natomwfc)) - ALLOCATE (at_dy(npwx,natomwfc), at_dj(npwx,natomwfc)) + ! -------------------- LUCA (added npol to the plane-wave sizes)----------------------- + ALLOCATE (spsi(npwx*npol,nbnd)) + ALLOCATE (wfcatom(npwx*npol,natomwfc)) + ALLOCATE (at_dy(npwx*npol,natomwfc), at_dj(npwx*npol,natomwfc)) IF (okvan) ALLOCATE (us_dy(npwx,nkb), us_dj(npwx,nkb)) +<<<<<<< HEAD IF (Hubbard_projectors.EQ."ortho-atomic") THEN ALLOCATE (swfcatom(npwx,natomwfc)) +======= + IF (U_projection.EQ."ortho-atomic") THEN + ALLOCATE (swfcatom(npwx*npol,natomwfc)) +>>>>>>> 0c14430b9 (loaded PW implementation) ALLOCATE (eigenval(natomwfc)) ALLOCATE (eigenvect(natomwfc,natomwfc)) ALLOCATE (overlap_inv(natomwfc,natomwfc)) ENDIF ! - CALL allocate_bec_type( nwfcU, nbnd, proj ) + ! ----------------------- LUCA ------------------- + IF (noncolin) THEN + ALLOCATE (proj%k (nwfcU, nbnd)) + ELSE + CALL allocate_bec_type( nwfcU, nbnd, proj ) + ENDIF + ! ------------------------------------------------ ! ! poor-man parallelization over bands ! - if nproc_pool=1 : nb_s=1, nb_e=nbnd, mykey=0 @@ -100,12 +119,19 @@ SUBROUTINE stres_hub ( sigmah ) ! IF (lda_plus_u_kind.EQ.0) THEN ldim = 2 * Hubbard_lmax + 1 - ALLOCATE (dns(ldim,ldim,nspin,nat)) + ! ---------------------- LUCA ----------------------- + IF (noncolin) then + ALLOCATE ( dns_nc(ldim, ldim, nspin, nat) ) + ELSE + ALLOCATE (dns(ldim,ldim,nspin,nat)) + ENDIF + ! -------------------------------------------------- lhubb = .FALSE. DO nt = 1, ntyp IF (is_hubbard_back(nt)) lhubb = .TRUE. ENDDO IF (lhubb) THEN + IF (noncolin) CALL errore ("stres_hub","Noncollinear and background are not supported",1) ldimb = ldmx_b ALLOCATE ( dnsb(ldimb,ldimb,nspin,nat) ) ENDIF @@ -159,7 +185,15 @@ SUBROUTINE stres_hub ( sigmah ) CALL orthoUwfc_k (ik, .TRUE.) ! ! proj= - CALL calbec ( npw, wfcU, spsi, proj) + ! ----------------------- LUCA ------------------------------------ + IF (noncolin) THEN + CALL ZGEMM ('C', 'N', nwfcU, nbnd, npwx*npol, (1.0_DP, 0.0_DP), wfcU, & + npwx*npol, spsi, npwx*npol, (0.0_DP, 0.0_DP), proj%k, nwfcU) + CALL mp_sum( proj%k( :, 1:nbnd ), intra_bgrp_comm ) + ELSE + CALL calbec ( npw, wfcU, spsi, proj) + ENDIF + ! ---------------------------------------------------------------- ! ! Compute derivatives of spherical harmonics and spherical Bessel functions ! @@ -194,25 +228,47 @@ SUBROUTINE stres_hub ( sigmah ) ! ! Compute the derivative of the occupation matrix w.r.t epsilon ! - IF (gamma_only) THEN - CALL dndepsilon_gamma(ipol,jpol,ldim,proj%r,spsi,ik,nb_s,nb_e,mykey,1,dns) - ELSE - CALL dndepsilon_k(ipol,jpol,ldim,proj%k,spsi,ik,nb_s,nb_e,mykey,1,dns) - ENDIF - ! - DO na = 1, nat - nt = ityp(na) - IF ( is_hubbard(nt) ) THEN - DO is = 1, nspin - DO m2 = 1, 2 * Hubbard_l(nt) + 1 - DO m1 = 1, 2 * Hubbard_l(nt) + 1 - sigmah(ipol,jpol) = sigmah(ipol,jpol) - & - v%ns(m2,m1,is,na) * dns(m1,m2,is,na) + ! ------------------- LUCA ------------------------------- + IF (noncolin) THEN + CALL dndepsilon_k_nc (ipol,jpol,ldim,proj%k,spsi,ik,nb_s,nb_e,mykey,1,dns_nc ) + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + DO is = 1, npol + DO is2 = 1, npol + DO m2 = 1, 2*Hubbard_l(nt)+1 + DO m1 = 1, 2*Hubbard_l(nt)+1 + sigmah(ipol,jpol) = sigmah(ipol,jpol) - & + dble( v%ns_nc(m2, m1, npol*(is-1)+is2, na) * & + dns_nc(m1, m2, npol*(is2-1)+is, na) ) + ENDDO + ENDDO ENDDO ENDDO - ENDDO - ENDIF - ENDDO + ENDIF + ENDDO + ELSE + IF (gamma_only) THEN + CALL dndepsilon_gamma(ipol,jpol,ldim,proj%r,spsi,ik,nb_s,nb_e,mykey,1,dns) + ELSE + CALL dndepsilon_k(ipol,jpol,ldim,proj%k,spsi,ik,nb_s,nb_e,mykey,1,dns) + ENDIF + ! + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + DO is = 1, nspin + DO m2 = 1, 2 * Hubbard_l(nt) + 1 + DO m1 = 1, 2 * Hubbard_l(nt) + 1 + sigmah(ipol,jpol) = sigmah(ipol,jpol) - & + v%ns(m2,m1,is,na) * dns(m1,m2,is,na) + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO + ENDIF + ! --------------------------------------------------------- ! ! The background part ! @@ -314,6 +370,8 @@ SUBROUTINE stres_hub ( sigmah ) ! CALL deallocate_bec_type ( proj ) IF (ALLOCATED(dns)) DEALLOCATE (dns) + ! ----------------- LUCA --------------------------- + IF (ALLOCATED(dns_nc)) DEALLOCATE(dns_nc) IF (ALLOCATED(dnsb)) DEALLOCATE (dnsb) IF (ALLOCATED(dnsg)) DEALLOCATE (dnsg) DEALLOCATE (spsi) @@ -494,6 +552,156 @@ SUBROUTINE dndepsilon_k ( ipol,jpol,ldim,proj,spsi,ik,nb_s,nb_e,mykey,lpuk,dns ) ! END SUBROUTINE dndepsilon_k ! +! ---------------------- LUCA ------------------------------------ +SUBROUTINE dndepsilon_k_nc ( ipol,jpol,ldim,proj,spsi,ik,nb_s,nb_e,mykey,lpuk,dns_nc ) + !----------------------------------------------------------------------------- + !! This routine computes the derivative of the ns_nc atomic occupations with + !! respect to the strain epsilon(ipol,jpol) used to obtain the Hubbard + !! contribution to the internal stres tensor in the noncollinear formalism. + ! + USE kinds, ONLY : DP + USE ions_base, ONLY : nat, ityp + USE klist, ONLY : ngk + USE lsda_mod, ONLY : nspin, current_spin + USE wvfct, ONLY : nbnd, npwx, wg + USE becmod, ONLY : bec_type, allocate_bec_type, deallocate_bec_type + USE noncollin_module, ONLY : npol, noncolin + USE mp_pools, ONLY : intra_pool_comm + USE mp, ONLY : mp_sum + USE ldaU, ONLY : nwfcU, offsetU, Hubbard_l, is_hubbard, & + ldim_back, offsetU_back, offsetU_back1, & + is_hubbard_back, Hubbard_l_back, backall + USE wavefunctions_gpum,ONLY : using_evc + USE becmod_subs_gpum, ONLY : using_becp_auto + + IMPLICIT NONE + ! + ! I/O variables + ! + INTEGER, INTENT(IN) :: ipol + !! component index 1 to 3 + INTEGER, INTENT(IN) :: jpol + !! component index 1 to 3 + INTEGER, INTENT(IN) :: ldim + !! number of m-values: ldim = 2*Hubbard_lmax+1 + COMPLEX (DP), INTENT(IN) :: proj(nwfcU,nbnd) + !! projection + COMPLEX (DP), INTENT(IN) :: spsi(npwx*npol,nbnd) + !! \(S|\ \text{evc}\rangle\) + INTEGER, INTENT(IN) :: ik + !! k-point index + INTEGER, INTENT(IN) :: nb_s + !! starting band number (for band parallelization) + INTEGER, INTENT(IN) :: nb_e + !! ending band number (for band parallelization) + INTEGER, INTENT(IN) :: mykey + !! If each band appears more than once + !! compute its contribution only once (i.e. when mykey=0) + INTEGER, INTENT(IN) :: lpuk + !! index to control the standard (lpuk=1) or + !! background (lpuk=2) contribution to the force + COMPLEX(DP), INTENT(OUT) :: dns_nc(ldim,ldim,nspin,nat) + !! the derivative of the ns atomic occupations + ! + ! ... local variables + ! + INTEGER :: ibnd, & ! count on bands + is, & ! count on spins + npw, & ! number of plane waves + na, & ! atomic index + nt, & ! index of the atomic type + m1, m2, m11, m22, & ! indices of magnetic quantum numbers + off1, off2, off22, & ! indices for the offsets + is1, is2, i, j, ldim1 + COMPLEX(DP), ALLOCATABLE :: dproj(:,:) + REAL(DP) :: psum + ! + ALLOCATE ( dproj(nwfcU,nbnd) ) + ! + CALL using_evc(0) + CALL using_becp_auto(2) + ! + ! D_Sl for l=1 and l=2 are already initialized, for l=0 D_S0 is 1 + ! + ! Offset of atomic wavefunctions initialized in setup and stored in offsetU + ! + dns_nc(:,:,:,:) = 0.d0 + ! + npw = ngk(ik) + ! + ! Calculate the first derivative of proj with respect to epsilon(ipol,jpol) + ! + CALL dprojdepsilon_k (spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj) + ! + ! Band parallelization. If each band appears more than once + ! compute its contribution only once (i.e. when mykey=0) + ! + IF ( mykey /= 0 ) GO TO 10 + ! + ! !omp parallel do default(shared) private(na,nt,m1,m2,ibnd) + DO na = 1, nat + nt = ityp(na) + IF ( is_hubbard(nt) ) THEN + ! + ldim1 = 2*Hubbard_l(nt)+1 + DO is1 = 1, npol + DO is2 = 1, npol + i = npol*(is1-1) + is2 + DO m1 = 1, ldim1 + DO m2 = 1, ldim1 + DO ibnd = nb_s, nb_e + dns_nc(m1,m2,i,na) = dns_nc(m1,m2,i,na) + & + wg(ibnd,ik) * dcmplx(CONJG(dproj(offsetU(na)+m2+ldim1*(is2-1),ibnd) )* & + proj(offsetU(na)+m1+ldim1*(is1-1),ibnd) + & + CONJG(proj(offsetU(na)+m2+ldim1*(is2-1),ibnd) )* & + dproj(offsetU(na)+m1+ldim1*(is1-1),ibnd)) + ENDDO + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO +! !omp end parallel do + ! +10 CALL mp_sum(dns_nc, intra_pool_comm) + ! + ! + ! Impose hermiticity of dns_{m1,m2} + ! + ! !omp parallel do default(shared) private(na,is,m1,m2) + DO na = 1, nat + nt = ityp (na) + IF ( is_hubbard(nt) ) THEN + DO is1 = 1, npol + DO is2 = 1, npol + i = npol*(is1-1) + is2 + j = is1 + npol*(is2-1) + ldim1 = 2*Hubbard_l(nt)+1 + DO m1 = 1, ldim1 + DO m2 = 1, ldim1 + psum = ABS( dns_nc(m1,m2,i,na) - CONJG(dns_nc(m2,m1,j,na)) ) + IF (psum.GT.1.d-10) THEN + ! print*, na, m1, m2, is1, is2 + ! print*, dns_nc(m1,m2,i,na) + ! print*, dns_nc(m2,m1,j,na) + CALL errore( 'dns_nc', 'non hermitean matrix', 1 ) + ELSE + dns_nc(m2,m1,j,na) = CONJG( dns_nc(m1,m2,i,na) ) + ENDIF + ENDDO + ENDDO + ENDDO + ENDDO + ENDIF + ENDDO + ! + DEALLOCATE( dproj ) + ! + RETURN + ! +END SUBROUTINE dndepsilon_k_nc +! !------------------------------------------------------------------------------------ SUBROUTINE dndepsilon_gamma ( ipol,jpol,ldim,proj,spsi,ik,nb_s,nb_e,mykey,lpuk,dns ) !--------------------------------------------------------------------------------- @@ -964,13 +1172,14 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) USE force_mod, ONLY : eigenval, eigenvect, overlap_inv, at_dy, at_dj USE mp_bands, ONLY : intra_bgrp_comm USE mp, ONLY : mp_sum + USE noncollin_module, ONLY : noncolin, npol USE wavefunctions_gpum, ONLY : using_evc - ! IMPLICIT NONE ! ! I/O variables ! - COMPLEX(DP), INTENT(IN) :: spsi(npwx,nbnd) + ! --------------------------- LUCA (added npol to the plane-wave size) ------------------ + COMPLEX(DP), INTENT(IN) :: spsi(npwx*npol,nbnd) !! S|evc> INTEGER, INTENT(IN) :: ik !! k-point index @@ -1015,8 +1224,9 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) ! At first the derivatives of the atomic wfcs: we compute the term ! ! + ! --------------------------- LUCA (added npol to the plane-wave size) ------------------ ALLOCATE ( qm1(npwx), gk(3,npwx) ) - ALLOCATE ( dwfc(npwx,nwfcU) ) + ALLOCATE ( dwfc(npwx*npol,nwfcU) ) dwfc(:,:) = (0.d0, 0.d0) ! ! 1. Derivative of the atomic wavefunctions @@ -1045,10 +1255,33 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) nt = ityp(na) ldim_std = 2*Hubbard_l(nt)+1 IF (is_hubbard(nt) .OR. is_hubbard_back(nt)) THEN - DO m1 = 1, ldim_u(nt) - IF (m1.LE.ldim_std) THEN + ! --------------------------- LUCA ----------------------- + IF (U_projection.EQ."atomic") THEN + DO m1 = 1, ldim_u(nt) + IF (m1.LE.ldim_std) THEN + offpmU = offsetU(na) + offpm = oatwfc(na) + ELSE + offpmU = offsetU_back(na) - ldim_std + offpm = oatwfc_back(na) - ldim_std + IF (backall(nt) .AND. m1.GT.ldim_std+2*Hubbard_l_back(nt)+1) THEN + offpmU = offsetU_back1(na) - ldim_std - 2*Hubbard_l_back(nt) - 1 + offpm = oatwfc_back1(na) - ldim_std - 2*Hubbard_l_back(nt) - 1 + ENDIF + ENDIF + dwfc(ig,offpmU+m1) = at_dy(ig,offpm+m1) * a1 + at_dj(ig,offpm+m1) * a2 + IF (noncolin) THEN + IF (m1>ldim_std) CALL errore("dprojdepsilon_k", & + " Stress with background and noncollinear is not supported",1) + dwfc(ig+npwx,offpmU+m1+ldim_std) = at_dy(ig+npwx,offpm+m1+ldim_std)*a1 & + + at_dj(ig+npwx,offpm+m1+ldim_std)*a2 + ENDIF + ENDDO + ELSEIF (U_projection.EQ."ortho-atomic") THEN + DO m1 = 1, ldim_std*npol offpmU = offsetU(na) offpm = oatwfc(na) +<<<<<<< HEAD ELSE offpmU = offsetU_back(na) - ldim_std offpm = oatwfc_back(na) - ldim_std @@ -1062,19 +1295,28 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) ELSEIF (Hubbard_projectors.EQ."ortho-atomic") THEN IF (m1>ldim_std) CALL errore("dprojdtau_k", & " Stress with background and ortho-atomic is not supported",1) +======= +>>>>>>> 0c14430b9 (loaded PW implementation) DO m2 = 1, natomwfc dwfc(ig,offpmU+m1) = dwfc(ig,offpmU+m1) + & - overlap_inv(offpm+m1,m2) * ( at_dy(ig,m2) * a1 + at_dj(ig,m2) * a2 ) - ENDDO - ENDIF - ENDDO + overlap_inv(offpm+m1,m2) * ( at_dy(ig,m2) * a1 + at_dj(ig,m2) * a2 ) + IF (noncolin) dwfc(ig+npwx,offpmU+m1) = dwfc(ig+npwx,offpmU+m1) + & + overlap_inv(offpm+m1,m2) * ( at_dy(ig+npwx,m2) * a1 + at_dj(ig+npwx,m2) * a2 ) + ENDDO + ENDDO + ENDIF + ! -------------------------------------------------------- ENDIF ENDDO ! ENDDO ! ! The diagonal term - IF (ipol.EQ.jpol) dwfc(1:npw,:) = dwfc(1:npw,:) - wfcU(1:npw,:)*0.5d0 + ! ----------------- LUCA ----------------------------- + IF (ipol.EQ.jpol) THEN + dwfc(1:npw,:) = dwfc(1:npw,:) - wfcU(1:npw,:)*0.5d0 + IF (noncolin) dwfc(1+npwx:npwx+npw,:) = dwfc(1+npwx:npwx+npw,:) - wfcU(1+npwx:npwx+npw,:)*0.5d0 + ENDIF ! ! 2. Contribution due to the derivative of (O^{-1/2})_JI which ! is multiplied by atomic wavefunctions (only for ortho-atomic case) @@ -1105,10 +1347,17 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) doverlap(m1,m2) = doverlap(m1,m2) & + CONJG((at_dy(ig,m1)*a1 + at_dj(ig,m1)*a2)) * swfcatom(ig,m2) & + CONJG(swfcatom(ig,m1)) * (at_dy(ig,m2)*a1 + at_dj(ig,m2)*a2) + IF (noncolin) doverlap(m1,m2) = doverlap(m1,m2) & + + CONJG((at_dy(ig+npwx,m1)*a1 + at_dj(ig+npwx,m1)*a2)) * swfcatom(ig+npwx,m2) & + + CONJG(swfcatom(ig+npwx,m1)) * (at_dy(ig+npwx,m2)*a1 + at_dj(ig+npwx,m2)*a2) IF (ipol.EQ.jpol) THEN doverlap(m1,m2) = doverlap(m1,m2) & + CONJG((-wfcatom(ig,m1)*0.5d0)) * swfcatom(ig,m2) & + CONJG(swfcatom(ig,m1)) * (-wfcatom(ig,m2)*0.5d0) + ! --------------- LUCA ------------------------ + IF (noncolin) doverlap(m1,m2) = doverlap(m1,m2) & + + CONJG((-wfcatom(ig+npwx,m1)*0.5d0)) * swfcatom(ig+npwx,m2) & + + CONJG(swfcatom(ig+npwx,m1)) * (-wfcatom(ig+npwx,m2)*0.5d0) ENDIF ENDDO ENDDO @@ -1123,7 +1372,7 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) ! Calculate doverlap_us = < phi_I | dS/d\epsilon(ipol,jpol) | phi_J > ALLOCATE (doverlap_us(natomwfc,natomwfc)) CALL matrix_element_of_dSdepsilon (ik, ipol, jpol, & - natomwfc, wfcatom, natomwfc, wfcatom, doverlap_us, 1, natomwfc, 0) + natomwfc, wfcatom, natomwfc, wfcatom, doverlap_us, 1, natomwfc, 0, .false.) ! Sum up the "normal" and ultrasoft terms DO m1 = 1, natomwfc DO m2 = 1, natomwfc @@ -1152,9 +1401,9 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) IF (is_hubbard(nt) .OR. is_hubbard_back(nt)) THEN offpmU = offsetU(na) offpm = oatwfc(na) - CALL ZGEMM('N','N', npw, ldim_u(nt), natomwfc, (1.d0,0.d0), & - wfcatom, npwx, doverlap_inv(:,offpm+1:offpm+ldim_u(nt)), & - natomwfc, (1.d0,0.d0), dwfc(:,offpmU+1:offpmU+ldim_u(nt)), npwx) + CALL ZGEMM('N','N', npwx*npol, ldim_u(nt)*npol, natomwfc, (1.d0,0.d0), & + wfcatom, npwx*npol, doverlap_inv(:,offpm+1:offpm+ldim_u(nt)*npol), & + natomwfc, (1.d0,0.d0), dwfc(:,offpmU+1:offpmU+ldim_u(nt)*npol), npwx*npol) ENDIF ENDDO ! @@ -1164,7 +1413,15 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) ENDIF ! ! Compute dproj = = - CALL calbec ( npw, dwfc, spsi, dproj ) + ! ---------------- LUCA ---------------------- + IF (noncolin) THEN + CALL ZGEMM('C','N', nwfcU, nbnd, npwx*npol, (1.d0,0.d0), & + dwfc, npwx*npol, spsi, npwx*npol, (0.d0,0.d0), & + dproj, nwfcU) + CALL mp_sum( dproj, intra_bgrp_comm ) + ELSE + CALL calbec ( npw, dwfc, spsi, dproj ) + ENDIF ! DEALLOCATE ( dwfc, qm1, gk) ! @@ -1174,7 +1431,7 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) IF (okvan) THEN ALLOCATE(dproj_us(nwfcU,nb_s:nb_e)) CALL matrix_element_of_dSdepsilon (ik, ipol, jpol, & - nwfcU, wfcU, nbnd, evc, dproj_us, nb_s, nb_e, mykey) + nwfcU, wfcU, nbnd, evc, dproj_us, nb_s, nb_e, mykey, .true.) ! dproj + dproj_us DO m1 = 1, nwfcU dproj(m1,nb_s:nb_e) = dproj(m1,nb_s:nb_e) + dproj_us(m1,:) @@ -1188,7 +1445,7 @@ SUBROUTINE dprojdepsilon_k ( spsi, ik, ipol, jpol, nb_s, nb_e, mykey, dproj ) ! END SUBROUTINE dprojdepsilon_k ! -SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, lB_s, lB_e, mykey) +SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, lB_s, lB_e, mykey, flag) ! ! This routine computes the matrix element < A | dS/d\epsilon(ipol,jpol) | B > ! Written by I. Timrov (2020) @@ -1200,12 +1457,16 @@ SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, l USE cell_base, ONLY : tpiba USE gvect, ONLY : g USE wvfct, ONLY : npwx, wg - USE uspp, ONLY : nkb, vkb, qq_at, okvan - USE uspp_param, ONLY : nh + USE uspp, ONLY : nkb, vkb, qq_at, qq_so, okvan + USE uspp_param, ONLY : nh, upf USE wavefunctions, ONLY : evc USE becmod, ONLY : calbec USE klist, ONLY : xk, igk_k, ngk USE force_mod, ONLY : us_dy, us_dj + USE mp_bands, ONLY : intra_bgrp_comm + USE mp, ONLY : mp_sum + USE noncollin_module, ONLY : noncolin, npol + USE ldaU, ONLY : offsetU, Hubbard_l, is_hubbard ! IMPLICIT NONE ! @@ -1216,14 +1477,18 @@ SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, l INTEGER, INTENT(IN) :: lA, lB, lB_s, lB_e ! There is a possibility to parallelize over lB, ! where lB_s (start) and lB_e (end) - COMPLEX(DP), INTENT(IN) :: A(npwx,lA) - COMPLEX(DP), INTENT(IN) :: B(npwx,lB) + ! ------------------ LUCA -------------------------------- + LOGICAL, INTENT(IN) :: flag ! noncollinear: controlling whether + ! calculating + ! or (= .true.) + COMPLEX(DP), INTENT(IN) :: A(npwx*npol,lA) + COMPLEX(DP), INTENT(IN) :: B(npwx*npol,lB) COMPLEX(DP), INTENT(OUT) :: A_dS_B(lA,lB_s:lB_e) INTEGER, INTENT(IN) :: mykey ! ! Local variables ! - INTEGER :: npw, i, nt, na, ih, jh, ig, iA, iB, ijkb0 + INTEGER :: npw, i, nt, na, nb, ih, jh, ig, iA, iB, ijkb0, mU, mD, ldim, nt1 REAL(DP) :: gvec COMPLEX (DP), ALLOCATABLE :: Adbeta(:,:), Abeta(:,:), & dbetaB(:,:), betaB(:,:), & @@ -1257,20 +1522,38 @@ SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, l ! DO nt = 1, ntyp ! - ALLOCATE ( Adbeta(lA,nh(nt)) ) - ALLOCATE ( Abeta(lA,nh(nt)) ) - ALLOCATE ( dbetaB(nh(nt),lB) ) - ALLOCATE ( betaB(nh(nt),lB) ) - ALLOCATE ( qq(nh(nt),nh(nt)) ) + ! ------------------- LUCA (added npol) ------------------------------- + ALLOCATE ( Adbeta(lA,npol*nh(nt)) ) + ALLOCATE ( Abeta(lA,npol*nh(nt)) ) + ALLOCATE ( dbetaB(npol*nh(nt),lB) ) + ALLOCATE ( betaB(npol*nh(nt),lB) ) + ALLOCATE ( qq(npol*nh(nt),npol*nh(nt)) ) ! DO na = 1, nat ! IF ( ityp(na).EQ.nt ) THEN ! - qq(:,:) = CMPLX(qq_at(1:nh(nt),1:nh(nt),na), 0.0d0, kind=DP) + ! ----------- LUCA ---------------------------- + IF (noncolin) THEN + IF ( upf(nt)%has_so ) THEN + qq(1:nh(nt),1:nh(nt)) = qq_so(:,:,1,nt) + qq(1:nh(nt),1+nh(nt):nh(nt)*npol) = qq_so(:,:,2,nt) + qq(1+nh(nt):nh(nt)*npol,1:nh(nt)) = qq_so(:,:,3,nt) + qq(1+nh(nt):nh(nt)*npol,1+nh(nt):nh(nt)*npol) = qq_so(:,:,4,nt) + ELSE + qq(1:nh(nt),1:nh(nt)) = qq_at(1:nh(nt),1:nh(nt),na) + qq(1:nh(nt),1+nh(nt):nh(nt)*npol) = (0.0,0.0) + qq(1+nh(nt):nh(nt)*npol,1:nh(nt)) = (0.0,0.0) + qq(1+nh(nt):nh(nt)*npol,1+nh(nt):nh(nt)*npol) = qq_at(1:nh(nt),1:nh(nt),na) + ENDIF + ELSE + qq(:,:) = CMPLX(qq_at(1:nh(nt),1:nh(nt),na), 0.0d0, kind=DP) + ENDIF ! ! aux is used as a workspace - ALLOCATE ( aux(npwx,nh(nt)) ) + ! ------------ LUCA (added npol) ------------------------- + ALLOCATE ( aux(npwx*npol,nh(nt)*npol) ) + aux=(0.0,0.0) ! DO ih = 1, nh(nt) ! now we compute the true dbeta function @@ -1282,46 +1565,131 @@ SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, l ! - (k+G)_ipol * (k+G)_jpol / |k+G| a2 = -gk(ipol,ig)*gk(jpol,ig)*qm1(ig) ! + ! ----------------------- LUCA --------------------- aux(ig,ih) = us_dy(ig,ijkb0+ih) * a1 + us_dj(ig,ijkb0+ih) * a2 + IF (noncolin) aux(ig+npwx,ih+nh(nt)) = us_dy(ig,ijkb0+ih) * a1 & + + us_dj(ig,ijkb0+ih) * a2 ! - IF (ipol.EQ.jpol) aux(ig,ih) = aux(ig,ih) - vkb(ig,ijkb0+ih)*0.5d0 + IF (ipol.EQ.jpol) THEN + aux(ig,ih) = aux(ig,ih) - vkb(ig,ijkb0+ih)*0.5d0 + IF (noncolin) aux(ig+npwx,ih+nh(nt)) = aux(ig+npwx,ih+nh(nt)) & + - vkb(ig,ijkb0+ih)*0.5d0 + ENDIF ! + ! ---------------------------------------------------- ENDDO ENDDO - ! - ! Calculate dbetaB = - CALL calbec(npw, aux, B, dbetaB ) - ! - ! Calculate Adbeta = - CALL calbec(npw, A, aux, Adbeta ) + ! ------------------------------- LUCA --------------------------- + IF (noncolin) THEN + ! Calculate betaB = + ! dbetaB(:,1 : nh(nt)) = spin up + ! dbetaB(:,1+nh(nt): nh(nt)*npol) = spin down + dbetaB=(0.0,0.0) + CALL ZGEMM ('C', 'N', nh(nt)*npol, lB, npwx*npol, (1.0_DP, 0.0_DP), aux, & + npwx*npol, B, npwx*npol, (0.0_DP, 0.0_DP), dbetaB, nh(nt)*npol) + CALL mp_sum( dbetaB(:, 1:lB) , intra_bgrp_comm ) + ! + ! Calculate Adbeta = + ! Adbeta(:,1 : nh(nt)) = spin up + ! Adbeta(:,1+nh(nt): nh(nt)*npol) = spin down + Adbeta=(0.0,0.0) + CALL ZGEMM ('C', 'N', lA, nh(nt)*npol, npwx*npol, (1.0_DP, 0.0_DP), A, & + npwx*npol, aux, npwx*npol, (0.0_DP, 0.0_DP), Adbeta, lA) + CALL mp_sum( Adbeta(:, 1:nh(nt)*npol) , intra_bgrp_comm ) + ELSE + ! Calculate dbetaB = + CALL calbec(npw, aux, B, dbetaB ) + ! + ! Calculate Adbeta = + CALL calbec(npw, A, aux, Adbeta ) + ENDIF ! ! aux is now used as a work space to store vkb DO ih = 1, nh(nt) DO ig = 1, npw aux(ig,ih) = vkb(ig,ijkb0+ih) + ! -------------------------- LUCA --------------------------- + IF (noncolin) aux(ig+npwx,ih+nh(nt)) = vkb(ig,ijkb0+ih) + ! ------------------------------------------------------------------- ENDDO ENDDO ! - ! Calculate Abeta = - CALL calbec(npw, A, aux, Abeta ) - ! - ! Calculate betaB = - CALL calbec( npw, aux, B, betaB ) + ! --------------------- LUCA --------------------------------- + IF (noncolin) THEN + ! Calculate Abeta = + ! (same as Adbeta) + ! + Abeta=(0.0,0.0) + CALL ZGEMM ('C', 'N', lA, nh(nt)*npol, npwx*npol, (1.0_DP, 0.0_DP), A, & + npwx*npol, aux, npwx*npol, (0.0_DP, 0.0_DP), Abeta, lA) + CALL mp_sum( Abeta(:, 1:nh(nt)*npol) , intra_bgrp_comm ) + ! + ! Calculate betaB = + ! (same as dbetaB) + ! + betaB=(0.0,0.0) + CALL ZGEMM ('C', 'N', nh(nt)*npol, lB, npwx*npol, (1.0_DP, 0.0_DP), aux, & + npwx*npol, B, npwx*npol, (0.0_DP, 0.0_DP), betaB, nh(nt)*npol) + CALL mp_sum( betaB(:, 1:lB) , intra_bgrp_comm ) + ELSE + ! Calculate Adbeta = + CALL calbec ( npw, A, aux, Abeta ) + ! + ! Calculate dbetaB = + CALL calbec ( npw, aux, B, betaB ) + ENDIF + ! ------------------------------------------------------------ ! DEALLOCATE ( aux ) ! - ALLOCATE ( aux(nh(nt), lB) ) - ! - ! Calculate \sum_jh qq(ih,jh) * dbetaB(jh) - CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - qq, nh(nt), dbetaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & - aux(1,lB_s), nh(nt)) + ALLOCATE ( aux(nh(nt)*npol, lB) ) + aux(:,:)=(0.0,0.0) + ! + ! ----------------------- LUCA ---------------------------- + IF (noncolin) THEN + ! aux(:, 1:nh(nt)) = \sum_jh qq(1,jh)*dbetaB(1,jh) + qq(2,jh)*dbetaB(2,jh) + ! aux(:, 1+nh(nt):nh(nt)*npol) = \sum_jh qq(3,jh)*dbetaB(1,jh) + qq(4,jh)*dbetaB(2,jh) + ! + ! spin up + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1, 1), nh(nt)*npol, & + dbetaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1, lB_s), nh(nt)*npol) + ! spin down + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1+nh(nt), 1), nh(nt)*npol, & + dbetaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1+nh(nt), lB_s), nh(nt)*npol) + ELSE + ! Calculate \sum_jh qq_at(ih,jh) * dbetaB(jh) + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + qq, nh(nt), dbetaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & + aux(1,lB_s), nh(nt)) + ENDIF + ! ---------------------------------------------------------- dbetaB(:,:) = aux(:,:) ! - ! Calculate \sum_jh qq(ih,jh) * betaB(jh) - CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - qq, nh(nt), betaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & - aux(1,lB_s), nh(nt)) + ! ----------------------- LUCA ---------------------------- + IF (noncolin) THEN + ! (same as dbetaB) + ! + ! spin up + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1, 1), nh(nt)*npol, & + betaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1, lB_s), nh(nt)*npol) + ! spin down + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + qq(1+nh(nt), 1), nh(nt)*npol, & + betaB(1, lB_s), nh(nt)*npol, (0.0d0,0.0d0), & + aux(1+nh(nt), lB_s), nh(nt)*npol) + ELSE + ! Calculate \sum_jh qq_at(ih,jh) * betaB(jh) + CALL ZGEMM('N', 'N', nh(nt), lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + qq, nh(nt), betaB(1,lB_s), nh(nt), (0.0d0,0.0d0), & + aux(1,lB_s), nh(nt)) + ENDIF + ! ---------------------------------------------------------- betaB(:,:) = aux(:,:) ! DEALLOCATE ( aux ) @@ -1333,13 +1701,54 @@ SUBROUTINE matrix_element_of_dSdepsilon (ik, ipol, jpol, lA, A, lB, B, A_dS_B, l ! Only A_dS_B(:,lB_s:lB_e) are calculated ! IF ( mykey == 0 ) THEN - CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - Adbeta, lA, betaB(1,lB_s), nh(nt), (1.0d0,0.0d0), & - A_dS_B(1,lB_s), lA) - CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & - Abeta, lA, dbetaB(1,lB_s), nh(nt), (1.0d0,0.0d0), & - A_dS_B(1,lB_s), lA) - ! + ! ----------------------- LUCA ---------------------------- + IF (noncolin) THEN + nt1 = nh(nt) + 1 + IF ( flag ) THEN + DO nb = 1, nat + IF ( is_hubbard(ityp(nb)) ) THEN + ldim = 2*hubbard_l(ityp(nb)) + 1 + mU = offsetU(nb) + 1 + mD = mU + ldim + ! + ! spin up + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Adbeta(mU,1), lA, & + betaB(1, lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(mU, lB_s), lA) + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Abeta(mU,1), lA, & + dbetaB(1, lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(mU, lB_s), lA) + ! spin down + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Adbeta(mD, nt1), lA, & + betaB(nt1, lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(mD,lB_s), lA) + CALL ZGEMM('N', 'N', ldim, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Abeta(mD, nt1), lA, & + dbetaB(nt1, lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(mD,lB_s), lA) + ENDIF + ENDDO + ELSEIF ( .not. flag ) THEN + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + Adbeta, lA, betaB(1,lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt)*npol, (1.0d0,0.0d0), & + Abeta, lA, dbetaB(1,lB_s), nh(nt)*npol, (1.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + ENDIF + ELSE + + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Adbeta, lA, betaB(1,lB_s), nh(nt), (1.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + CALL ZGEMM('N', 'N', lA, lB_e-lB_s+1, nh(nt), (1.0d0,0.0d0), & + Abeta, lA, dbetaB(1,lB_s), nh(nt), (1.0d0,0.0d0), & + A_dS_B(1,lB_s), lA) + ENDIF + ! ---------------------------------------------------------- ENDIF ENDIF ! diff --git a/PW/src/sum_band.f90 b/PW/src/sum_band.f90 index 4ba20f70a..9c31f59f4 100644 --- a/PW/src/sum_band.f90 +++ b/PW/src/sum_band.f90 @@ -32,7 +32,8 @@ SUBROUTINE sum_band() becsum_d, ebecsum_d USE uspp_param, ONLY : nh, nhm USE wavefunctions, ONLY : evc, psic, psic_nc - USE noncollin_module, ONLY : noncolin, npol, nspin_mag, domag + USE noncollin_module, ONLY : noncolin, npol, nspin_mag, domag, lspinorb + USE upf_spinorb, ONLY : fcoef USE wvfct, ONLY : nbnd, npwx, wg, et, btype USE mp_pools, ONLY : inter_pool_comm USE mp_bands, ONLY : inter_bgrp_comm, intra_bgrp_comm, nbgrp @@ -108,7 +109,15 @@ SUBROUTINE sum_band() IF (lda_plus_u) THEN IF (lda_plus_u_kind.EQ.0) THEN ! - CALL new_ns(rho%ns) + ! --- LUCA ----- + IF (noncolin) THEN + CALL new_ns_nc(rho%ns_nc) + ELSE + ! --------------- + CALL new_ns(rho%ns) + ! ----LUCA ------------ + ENDIF + ! ----------------- ! DO nt = 1, ntyp IF (is_hubbard_back(nt)) CALL new_nsb(rho%nsb) diff --git a/PW/src/v_of_rho.f90 b/PW/src/v_of_rho.f90 index 93de34de3..9ce11bdd1 100644 --- a/PW/src/v_of_rho.f90 +++ b/PW/src/v_of_rho.f90 @@ -85,7 +85,15 @@ SUBROUTINE v_of_rho( rho, rho_core, rhog_core, & ! ! DFT+U (simplified) ! - CALL v_hubbard (rho%ns, v%ns, eth) + ! ---------------- LUCA ------------ + IF (noncolin) THEN + CALL v_hubbard_nc (rho%ns_nc, v%ns_nc, eth) + ELSE + ! ---------------------------- + CALL v_hubbard (rho%ns, v%ns, eth) + ! ---------------- LUCA ------------ + ENDIF + ! ------------------------------- ! ! Background IF (ldmx_b.GT.0) THEN @@ -857,6 +865,108 @@ SUBROUTINE v_hubbard( ns, v_hub, eth ) END SUBROUTINE v_hubbard !----------------------------------------------------------------------- +!--- LUCA +!---------------------------------------------------------------------- +SUBROUTINE v_hubbard_nc( ns, v_hub, eth ) + !--------------------------------------------------------------------- + ! + !! Computes Hubbard potential and Hubbard energy in the \textbf{noncollinear formulation}. + !! DFT+U: Simplified rotationally-invariant formulation by + !! Dudarev et al., Phys. Rev. B 57, 1505 (1998). + !! DFT+U+J0: B. Himmetoglu et al., Phys. Rev. B 84, 115108 (2011). + ! + USE kinds, ONLY : DP + USE ions_base, ONLY : nat, ityp + USE ldaU, ONLY : Hubbard_lmax, Hubbard_l, Hubbard_U, & + Hubbard_alpha, Hubbard_J0, Hubbard_beta + USE lsda_mod, ONLY : nspin + USE control_flags, ONLY : iverbosity, dfpt_hub + USE io_global, ONLY : stdout + ! + IMPLICIT NONE + ! + COMPLEX(DP) :: ns(2*Hubbard_lmax+1,2*Hubbard_lmax+1,nspin,nat) + !! Occupation matrix + COMPLEX(DP) :: v_hub(2*Hubbard_lmax+1,2*Hubbard_lmax+1,nspin,nat) + !! Hubbard potential + REAL(DP), INTENT(OUT) :: eth + !! Hubbard energy + ! + ! ... local variables + ! + INTEGER :: is, is1, na, nt, m1, m2 + ! + eth = 0.d0 + v_hub(:,:,:,:) = 0.d0 + ! + DO na = 1, nat + ! + nt = ityp (na) + ! + IF (Hubbard_U(nt) /= 0.d0) THEN + DO is = 1, nspin + ! + IF (is == 2) THEN + is1 = 3 + ELSEIF (is == 3) THEN + is1 = 2 + ELSE + is1 = is + ENDIF + ! + ! Non spin-flip contribution + ! (diagonal [spin indexes] occupancy matrices) + IF (is1 == is) THEN + ! + ! diagonal part [spin indexes] + DO m1 = 1, 2*Hubbard_l(nt) + 1 + ! Hubbard energy + eth = eth + ( Hubbard_alpha(nt) + 0.5D0*Hubbard_U(nt) )& + * ns(m1,m1,is,na) + ! Hubbard potential + v_hub(m1,m1,is,na) = v_hub(m1,m1,is,na) + & + Hubbard_alpha(nt) + 0.5D0*Hubbard_U(nt) + ! + ! NON-diagonal part [spin indexes] + DO m2 = 1, 2 * Hubbard_l(nt) + 1 + ! Hubbard energy + eth = eth - 0.5D0 * Hubbard_U(nt) * ns(m1,m2,is,na)*ns(m2,m1,is,na) + ! Hubbard potential + v_hub(m1,m2,is,na) = v_hub(m1,m2,is,na) - & + Hubbard_U(nt) * ns(m2,m1,is,na) + ENDDO + ENDDO + ! + ! Spin-flip contribution + ! (NON-diagonal [spin indexes] occupancy matrices) + ELSE + DO m1 = 1, 2*Hubbard_l(nt) + 1 + DO m2 = 1, 2 * Hubbard_l(nt) + 1 + ! Hubbard energy + eth = eth - 0.5D0 * Hubbard_U(nt) & + * ns(m1,m2,is,na)*ns(m2,m1,is1,na) + ! Hubbard potential + v_hub(m1,m2,is,na) = v_hub(m1,m2,is,na) - & + Hubbard_U(nt) * ns(m2,m1,is1,na) + ENDDO + ENDDO + ENDIF + ENDDO + ENDIF + !--------------------------------------- + ! + ENDDO + ! + ! Hubbard energy + ! + IF ( iverbosity > 0 ) THEN + WRITE(stdout,'(/5x,"HUBBARD ENERGY = ",f9.4,1x," (Ry)")') eth + ENDIF + ! + RETURN + ! +END SUBROUTINE v_hubbard_nc +!--------------------------------------------------------------------------- !--------------------------------------------------------------------------- SUBROUTINE v_hubbard_b (ns, v_hub, eth) !-------------------------------------------------------------------------