Some optimization (A. Smogunov)

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@3449 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
smogunov 2006-10-11 09:31:53 +00:00
parent a806c4703e
commit aad16258e6
3 changed files with 265 additions and 501 deletions

View File

@ -32,7 +32,6 @@ local.o \
local_set.o \ local_set.o \
poten.o \ poten.o \
print_clock_pwcond.o \ print_clock_pwcond.o \
rotate.o \
rotproc.o \ rotproc.o \
save_cond.o \ save_cond.o \
scatter_back.o \ scatter_back.o \

View File

@ -1,172 +0,0 @@
!
! Copyright (C) 2003 A. Smogunov
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
!
!
subroutine rotatef(app, bpp, bf, anlp, bnlp, bnlf, intw1, intw2, &
n2d, norbf, norbnow, npol)
!
! This subroutine makes a linear combination of the solutions
! in such a way that bpp at this slab becomes a delta symbol.
! It works for forward iterative process.
!
#include "f_defs.h"
USE kinds, only : DP
implicit none
integer :: norbf, n2d, norbnow, lam, n, n1, iorb, iorb1, npol, info
integer, allocatable :: ipiv(:)
complex(DP) :: &
app(n2d, n2d), & ! coeff. of local functions
bpp(n2d, n2d), & ! --
bf(n2d, n2d), & ! --
anlp(n2d, norbnow*npol), & ! coeff. of nonloc. functions
bnlp(n2d, norbnow*npol), & ! --
bnlf(n2d, norbnow*npol), & ! --
intw1(norbf*npol, 2*n2d), & ! integral of loc. fun.
intw2(norbf*npol, norbf*npol) ! integral of nonloc. fun.
complex(DP), allocatable :: h(:,:), aux(:,:)
complex(DP), parameter :: one=(1.d0,0.d0), zero=(0.d0,0.d0)
call start_clock('rotatef')
allocate( h( n2d, n2d ) )
allocate( ipiv( n2d ) )
!
! To find the needed matrix h of the linear transformation
!
h=(0.d0,0.d0)
do lam=1, n2d
h(lam,lam)=(1.d0,0.d0)
enddo
call ZGESV(n2d,n2d,bpp,n2d,ipiv,h,n2d,info)
!
! To rotate app, bf, bpp
!
allocate( aux( n2d, n2d ) )
call ZGEMM('n','n',n2d,n2d,n2d,one,app,n2d,h,n2d,zero,aux,n2d)
app=aux
call ZGEMM('n','n',n2d,n2d,n2d,one,bf,n2d,h,n2d,zero,aux,n2d)
bf=aux
bpp=(0.d0,0.d0)
do lam=1, n2d
bpp(lam,lam)=(1.d0,0.d0)
enddo
deallocate(aux)
!
! To recalculate intw1 with new functions
!
if (norbnow==0) goto 100
allocate( aux( norbf*npol, n2d ) )
call ZGEMM('n','n',norbnow*npol,n2d,n2d,one,intw1,norbf*npol,h,n2d,zero,&
aux,norbf*npol)
do iorb=1,norbnow*npol
do n=1,n2d
intw1(iorb,n)=aux(iorb,n)
enddo
enddo
deallocate(aux)
!
! To reobtain nonlinear functions and the integrals
! intw2 on them.
!
call ZGEMM('n','n',n2d,norbnow*npol,n2d,-one,app,n2d,bnlp,n2d,one,&
anlp,n2d)
call ZGEMM('n','n',n2d,norbnow*npol,n2d,-one,bf,n2d,bnlp,n2d,one,&
bnlf,n2d)
call ZGEMM('n','n',norbnow*npol,norbnow*npol,n2d,-one,intw1,norbf*npol, &
bnlp,n2d,one,intw2,norbf*npol)
bnlp=(0.d0,0.d0)
100 continue
deallocate(h)
deallocate(ipiv)
call stop_clock('rotatef')
return
end subroutine rotatef
!------------------------------------------
subroutine rotateb (app, bpp, af, intw1, n2d, norbf, norbnow, npol)
!
! This subroutine makes a linear combination of the solutions
! in such a way that app at this slab becomes a delta symbol.
! It works for backward iterative process.
!
#include "f_defs.h"
USE kinds, only : DP
implicit none
integer :: norbf, n2d, norbnow, lam, n, n1, iorb, npol, info
integer, allocatable :: ipiv(:)
complex(DP) :: app(n2d,n2d), af(n2d,n2d), bpp(n2d,n2d), &
intw1(norbf*npol,2*n2d)
complex(DP), allocatable :: h(:,:), aux(:,:), aux1(:,:)
complex(DP), parameter :: one=(1.d0,0.d0), zero=(0.d0,0.d0)
call start_clock('rotateb')
allocate( h( n2d, n2d ) )
allocate( ipiv( n2d ) )
!
! To find the needed matrix h of the linear transformation
!
h=(0.d0,0.d0)
do lam=1, n2d
h(lam,lam)=(1.d0,0.d0)
enddo
call ZGESV(n2d, n2d, app, n2d, ipiv, h, n2d, info)
!
! To rotate app, bpp, af
!
allocate( aux( n2d, n2d ) )
call ZGEMM('n','n',n2d,n2d,n2d,one,bpp,n2d,h,n2d,zero,aux,n2d)
bpp=aux
call ZGEMM('n','n',n2d,n2d,n2d,one,af,n2d,h,n2d,zero,aux,n2d)
af=aux
app=(0.d0,0.d0)
do lam=1, n2d
app(lam,lam)=(1.d0,0.d0)
enddo
deallocate(aux)
!
! To recalculate intw1 with new functions
!
if (norbnow==0) goto 100
allocate( aux( norbf*npol, n2d ) )
allocate( aux1( norbf*npol, n2d ) )
do iorb=1,norbnow*npol
do n=1,n2d
aux1(iorb,n)= intw1(iorb,n2d+n)
enddo
enddo
call ZGEMM('n','n',norbnow*npol,n2d,n2d,one,aux1,norbf*npol, &
h,n2d,zero,aux,norbf*npol)
do iorb=1,norbnow*npol
do n=1,n2d
intw1(iorb,n2d+n)= aux(iorb,n)
enddo
enddo
deallocate(aux)
deallocate(aux1)
100 continue
deallocate(h)
deallocate(ipiv)
call stop_clock('rotateb')
return
end subroutine rotateb

View File

@ -7,6 +7,7 @@
! !
! Optimized Aug. 2004 (ADC) ! Optimized Aug. 2004 (ADC)
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC). ! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
! Optimized Oct. 2006 (A. Smogunov)
! !
#include "f_defs.h" #include "f_defs.h"
! !
@ -34,8 +35,8 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
tblm(4,norb), & tblm(4,norb), &
k, kz, n, lam, ig, lam1, mdim, itt, nbb, iorb, iorb1, & k, kz, n, lam, ig, lam1, mdim, itt, nbb, iorb, iorb1, &
iorba, iorb1a, is, kp, nok, k1, nt, nb, kin, kfin iorba, iorb1a, is, kp, nok, k1, nt, nb, kin, kfin
INTEGER :: info INTEGER :: i, j, info
INTEGER, ALLOCATABLE :: inslab(:) INTEGER, ALLOCATABLE :: ipiv(:), inslab(:)
real(DP) :: z(nrz+1), r(1:ndmx,npsx), rab(1:ndmx,npsx), & real(DP) :: z(nrz+1), r(1:ndmx,npsx), rab(1:ndmx,npsx), &
betar(1:ndmx,nbrx,npsx), taunew(4,norb) betar(1:ndmx,nbrx,npsx), taunew(4,norb)
REAL(DP), PARAMETER :: eps=1.d-8 REAL(DP), PARAMETER :: eps=1.d-8
@ -47,23 +48,22 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
zk(n2d,nrzp) zk(n2d,nrzp)
COMPLEX(DP), ALLOCATABLE :: & COMPLEX(DP), ALLOCATABLE :: &
psigper(:,:), & ! psigper(g,lam)=newbg(g,lam1) psiper(lam1,lam) psigper(:,:), & ! psigper(g,lam)=newbg(g,lam1) psiper(lam1,lam)
w0(:,:,:), & ! w0(z,g,m) are 2D Fourier components (see four.f) w0(:,:,:), & ! w0(z,g,m) are 2D Fourier components (see four.f)
w(:,:,:), & ! w(z,lam,m)=psigper(g,lam)^* \exp{-igr^m_perp} w(:,:,:), & ! w(z,lam,m)=psigper(g,lam)^* \exp{-igr^m_perp}
! w0(z,g,m) ! w0(z,g,m)
ci(:,:,:), & ! ci(m,lam,k)=\int_{z(k)}^{z(k+1)} dz hmat(:,:), & ! ci(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
! w(z,lam,m)^*\exp{izk(lam,k)(z-z(k))} ! w(z,lam,m)^*\exp{izk(lam,k)(z-z(k))}
di(:,:,:), & ! di(m,lam,k)=\int_{z(k)}^{z(k+1)} dz amat(:,:), & ! di(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
! w(z,lam,m)^*\exp{izk(lam,k)(z(k+1)-z)} ! w(z,lam,m)^*\exp{izk(lam,k)(z(k+1)-z)}
cix(:,:,:), & ! xmat(:,:), & !
dix(:,:,:), & ! ci(:,:), & ! ci(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
bf(:,:), an(:,:), bn(:,:), & ! w(z,lam,m)^*\exp{izk(lam,k)(z-z(k))}
app(:,:), bpp(:,:), al(:,:), & di(:,:), & ! di(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
bl(:,:), af(:,:), & ! w(z,lam,m)^*\exp{izk(lam,k)(z(k+1)-z)}
bnlf(:,:), anln(:,:), bnln(:,:), & cix(:,:,:), & !
anlp(:,:), bnlp(:,:), anll(:,:), & dix(:,:,:), & !
ff(:,:), fl(:,:), ezk(:,:), emzk(:,:), zk2(:,:), s1m(:,:), s2m(:,:), & f0(:,:), f1(:,:), f2(:,:), f_aux(:,:), &
s3m(:,:), s4m(:,:), s5m(:,:), s6m(:,:), s7m(:,:), s8m(:,:), & zkk(:), ezk(:), emzk(:), zk2(:), ezk1(:,:), emzk1(:,:)
ezk1(:,:), emzk1(:,:)
CALL start_clock('scatter_forw') CALL start_clock('scatter_forw')
@ -77,40 +77,34 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
! !
call divide(nrz,kin,kfin) call divide(nrz,kin,kfin)
!------------------------
! Start of 2D Fourier components calculations and depending
! variables
!
ALLOCATE( psigper( ngper*npol, n2d ) ) ALLOCATE( psigper( ngper*npol, n2d ) )
ALLOCATE( w( nz1, n2d, norb*npol ) ) ALLOCATE( w( nz1, n2d, norb*npol ) )
ALLOCATE( w0( nz1, ngper, 5 ) ) ALLOCATE( w0( nz1, ngper, 5 ) )
ALLOCATE( cix( nz1, n2d, norb*npol ) ) ALLOCATE( cix( nz1, n2d, norb*npol ) )
ALLOCATE( dix( nz1, n2d, norb*npol ) ) ALLOCATE( dix( nz1, n2d, norb*npol ) )
ALLOCATE( ci( norb*npol, n2d, nrzp ) ) ALLOCATE( ci( norb*npol, n2d ) )
ALLOCATE( di( norb*npol, n2d, nrzp ) ) ALLOCATE( di( norb*npol, n2d ) )
ALLOCATE( inslab( norb ) ) ALLOCATE( inslab( norb ) )
ALLOCATE( ezk( n2d, nrzp ) ) ALLOCATE( zkk( n2d ) )
ALLOCATE( emzk( n2d, nrzp ) ) ALLOCATE( ezk( n2d ) )
ALLOCATE( emzk( n2d ) )
ALLOCATE( ezk1( nz1, n2d ) ) ALLOCATE( ezk1( nz1, n2d ) )
ALLOCATE( emzk1( nz1, n2d ) ) ALLOCATE( emzk1( nz1, n2d ) )
ALLOCATE( zk2( n2d, nrzp ) ) ALLOCATE( zk2( n2d ) )
ALLOCATE( amat( 2*n2d, 2*n2d ) )
ALLOCATE( xmat( 2*n2d, 2*n2d+norb*npol ) )
ALLOCATE( ipiv(2*n2d) )
ALLOCATE( f0(n2d,norb*npol) )
ALLOCATE( f1(n2d,norb*npol) )
ALLOCATE( f2(n2d,norb*npol) )
ALLOCATE( f_aux(norb*npol,n2d) )
intw1=(0.d0,0.d0) intw1=(0.d0,0.d0)
intw2=(0.d0,0.d0) intw2=(0.d0,0.d0)
do k=kin,kfin
kp = k-kin+1
do lam=1,n2d
arg=cim*tpi*zk(lam, kp)*dz
ezk(lam,kp)=exp(arg)
emzk(lam,kp)=exp(-arg)
zk2(lam,kp)=cim/(2.d0*zk(lam,kp)*tpiba)
enddo
enddo
! !
! some orbitals relations ! some orbitals relations
! !
do iorb=1, norb do iorb=1, norb
inslab(iorb) = 0 inslab(iorb) = 0
enddo enddo
@ -129,26 +123,37 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
endif endif
enddo enddo
CALL start_clock('integrals') !--- initial conditions for a_n coefficients
! xmat = 0.d0
! The loop over slabs to compute ci, di, and initial intw2 do lam = n2d+1, 2*n2d
! xmat(lam,lam) = 1.d0
enddo
!---
do k = kin, kfin do k = kin, kfin
kp = k-kin+1 kp = k-kin+1
! write(6,*) 'integrals k=', k !------
! Start of 2D Fourier components calculations and depending
! variables
!
do lam=1,n2d
arg=cim*tpi*zk(lam, kp)*dz
zkk(lam)=cim*zk(lam,kp)*tpiba
ezk(lam)=exp(arg)
emzk(lam)=exp(-arg)
zk2(lam)=cim/(2.d0*zk(lam,kp)*tpiba)
DO lam=1,n2d arg=cim*tpi*zk(lam,kp)*dz1
arg=cim*zk(lam,kp)*dz1*tpi fact=exp(arg)
fact=EXP(arg) factm=exp(-arg)
factm=EXP(-arg) ezk1(1,lam)=fact
ezk1(1,lam)=fact emzk1(1,lam)=factm
emzk1(1,lam)=factm do k1=2,nz1
DO k1=2,nz1 ezk1(k1,lam)=ezk1(k1-1,lam)*fact
ezk1(k1,lam)=ezk1(k1-1,lam)*fact emzk1(k1,lam)=emzk1(k1-1,lam)*factm
emzk1(k1,lam)=emzk1(k1-1,lam)*factm enddo
ENDDO enddo
ENDDO
if(ewind.le.100.d0) then if(ewind.le.100.d0) then
CALL ZGEMM('n', 'n', ngper*npol, n2d, n2d, one, newbg, & CALL ZGEMM('n', 'n', ngper*npol, n2d, n2d, one, newbg, &
@ -158,9 +163,11 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
psigper(:,:) = psiper(:,:,kp) psigper(:,:) = psiper(:,:,kp)
endif endif
w=(0.d0,0.d0) w = 0.d0
ci = 0.d0
di = 0.d0
DO iorb=1, norb DO iorb=1, norb
IF(cros(iorb,k).EQ.1.AND.inslab(iorb).EQ.iorb) THEN IF(cros(iorb,k).EQ.1.AND.inslab(iorb).EQ.iorb) THEN
mdim = 2*tblm(3,iorb)+1 mdim = 2*tblm(3,iorb)+1
nt = tblm(1,iorb) nt = tblm(1,iorb)
nb = tblm(2,iorb) nb = tblm(2,iorb)
@ -182,10 +189,10 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
ENDDO ENDDO
ENDDO ENDDO
ENDDO ENDDO
ENDDO ENDDO
ENDIF ENDIF
ENDDO ENDDO
ENDIF ENDIF
ENDDO ENDDO
DO iorb=1, norb*npol DO iorb=1, norb*npol
iorba=iorb iorba=iorb
@ -200,13 +207,13 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
DO iorb=1, norb*npol DO iorb=1, norb*npol
iorba=iorb iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2 IF (npol.EQ.2) iorba=(iorb+1)/2
IF (cros(iorba,k).EQ.1) THEN IF (cros(iorba,k).EQ.1) THEN
DO lam=1, n2d DO lam=1, n2d
ci(iorb,lam,kp)=int1d(w(1,lam,iorb), & ci(iorb,lam)=int1d(w(1,lam,iorb), &
zk(lam,kp),dz,dz1,nz1,tpiba,1) zk(lam,kp),dz,dz1,nz1,tpiba,1)
di(iorb,lam,kp)=int1d(w(1,lam,iorb), & di(iorb,lam)=int1d(w(1,lam,iorb), &
zk(lam,kp),dz,dz1,nz1,tpiba,-1) zk(lam,kp),dz,dz1,nz1,tpiba,-1)
ENDDO ENDDO
DO iorb1=1, norb*npol DO iorb1=1, norb*npol
iorb1a=iorb1 iorb1a=iorb1
IF (npol.EQ.2) iorb1a=(iorb1+1)/2 IF (npol.EQ.2) iorb1a=(iorb1+1)/2
@ -215,315 +222,245 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
intw2(iorb,iorb1)=intw2(iorb,iorb1)- & intw2(iorb,iorb1)=intw2(iorb,iorb1)- &
int2d(w(1,lam,iorb),w(1,lam,iorb1),cix(1,lam,iorb1), & int2d(w(1,lam,iorb),w(1,lam,iorb1),cix(1,lam,iorb1), &
dix(1,lam,iorb1),ezk1(1,lam),emzk1(1,lam), & dix(1,lam,iorb1),ezk1(1,lam),emzk1(1,lam), &
zk(lam,kp),dz1,tpiba,nz1)*zk2(lam,kp) zk(lam,kp),dz1,tpiba,nz1)*zk2(lam)
ENDDO ENDDO
ENDIF ENDIF
ENDDO ENDDO
ENDIF ENDIF
ENDDO ENDDO
ENDDO !----
CALL stop_clock('integrals')
DEALLOCATE(psigper)
DEALLOCATE(cix)
DEALLOCATE(dix)
DEALLOCATE(w)
DEALLOCATE(w0)
DEALLOCATE(inslab)
!-----------------------------------
!-----------
! computes f1 and f2
! !
! Some allocation for iterative process f1 = 0.d0
! f2 = 0.d0
ALLOCATE( bf( n2d, n2d ) )
ALLOCATE( an( n2d, n2d ) )
ALLOCATE( bn( n2d, n2d ) )
ALLOCATE( app( n2d, n2d ) )
ALLOCATE( bpp( n2d, n2d ) )
ALLOCATE( al( n2d, n2d ) )
ALLOCATE( bl( n2d, n2d ) )
ALLOCATE( af( n2d, n2d ) )
ALLOCATE( s1m( n2d, n2d ) )
ALLOCATE( s2m( n2d, n2d ) )
ALLOCATE( s3m( n2d, n2d ) )
ALLOCATE( s4m( n2d, n2d ) )
ALLOCATE( bnlf( n2d, norb*npol ) )
ALLOCATE( anln( n2d, norb*npol ) )
ALLOCATE( bnln( n2d, norb*npol ) )
ALLOCATE( anlp( n2d, norb*npol ) )
ALLOCATE( bnlp( n2d, norb*npol ) )
ALLOCATE( anll( n2d, norb*npol ) )
ALLOCATE( ff( n2d, norb*npol ) )
ALLOCATE( fl( n2d, norb*npol ) )
!
! We set up the starting values
!
app=(0.d0,0.d0)
an=(0.d0,0.d0)
bpp=(0.d0,0.d0)
bn=(0.d0,0.d0)
bf=(0.d0,0.d0)
anlp=(0.d0,0.d0)
anln=(0.d0,0.d0)
bnlp=(0.d0,0.d0)
bnln=(0.d0,0.d0)
bnlf=(0.d0,0.d0)
ff=(0.d0,0.d0)
fl=(0.d0,0.d0)
DO lam=1, n2d
bf(lam,lam)=(1.d0,0.d0)
bpp(lam,lam)=(1.d0,0.d0)
ENDDO
!
! To compute intw1, ff, fl for the first slab
!
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
IF (cros(iorba,kin).EQ.1) THEN
DO lam=1, n2d
intw1(iorb,lam)=di(iorb, lam, 1)
arg=ezk(lam, 1)
IF (ABS(AIMAG(zk(lam, 1))).LT.eps) THEN
ff(lam,iorb)=-arg*CONJG(di(iorb,lam,1))*zk2(lam,1)
fl(lam,iorb)=-arg*CONJG(ci(iorb,lam,1))*zk2(lam,1)
ELSE
ff(lam,iorb)=-CONJG(ci(iorb,lam,1))*zk2(lam,1)
fl(lam,iorb)=-CONJG(di(iorb,lam,1))*zk2(lam,1)
ENDIF
ENDDO
ENDIF
ENDDO
!------------------------------------
! The main loop over slabs
!
DO k=kin+1, kfin
kp = k-kin+1
nok = 0
DO iorb=1, norb*npol DO iorb=1, norb*npol
iorba=iorb iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2 IF (npol.EQ.2) iorba=(iorb+1)/2
tr=taunew(3,iorba)-taunew(4,iorba) IF (cros(iorba,k).EQ.1) THEN
IF (z(k)+dz.GT.tr) nok=iorb DO lam=1, n2d
ENDDO IF (ABS(AIMAG(zk(lam, kp))).LT.eps) THEN
DO lam=1, n2d f1(lam,iorb)=-ezk(lam)*CONJG(di(iorb,lam))*zk2(lam)
DO lam1=1,n2d f2(lam,iorb)=-ezk(lam)*CONJG(ci(iorb,lam))*zk2(lam)
c=ZDOTC(n2d,psiper(1,lam,kp),1,psiper(1,lam1,kp-1),1) ELSE
s1m(lam,lam1)=(zk(lam,kp)+zk(lam1,kp-1))/zk(lam,kp)*c f1(lam,iorb)=-CONJG(ci(iorb,lam))*zk2(lam)
s2m(lam,lam1)=(zk(lam,kp)-zk(lam1,kp-1))/zk(lam,kp)*c f2(lam,iorb)=-CONJG(di(iorb,lam))*zk2(lam)
c=ezk(lam1,kp-1) ENDIF
s3m(lam,lam1)=s1m(lam,lam1)*c ENDDO
s4m(lam,lam1)=s2m(lam,lam1)*c ENDIF
ENDDO
ENDDO ENDDO
!------------
CALL ZGEMM('n','n',n2d,n2d,n2d,one,s3m,n2d,app,n2d,one,an,n2d) if(kp.eq.1) then
CALL ZGEMM('n','n',n2d,n2d,n2d,one,s4m,n2d,app,n2d,one,bn,n2d) !-------
an= an+s2m ! b coeff. and fp on the left boundary
bn= bn+s1m fun0 = 0.d0
CALL ZGEMM('n','n',n2d,nok,n2d,one,s3m,n2d,anlp,n2d,one,anln,n2d) funl0 = 0.d0
CALL ZGEMM('n','n',n2d,nok,n2d,one,s1m,n2d,fl,n2d,one,anln,n2d) f0 = f1
CALL ZGEMM('n','n',n2d,nok,n2d,one,s4m,n2d,anlp,n2d,one,bnln,n2d) do n = 1, n2d
CALL ZGEMM('n','n',n2d,nok,n2d,one,s2m,n2d,fl,n2d,one,bnln,n2d) fun0(n,n) = 1.d0
an=an*0.5d0 enddo
bn=bn*0.5d0 !-----------
anln=anln*0.5d0 goto 11
bnln=bnln*0.5d0 endif
DO lam=1, n2d
DO n=1, n2d
bn(lam,n)=bn(lam,n)*emzk(lam,kp)
ENDDO
DO iorb=1, nok
bnln(lam,iorb)=bnln(lam,iorb)*emzk(lam,kp)
ENDDO
ENDDO
CALL DCOPY(2*n2d*n2d, an, 1, app, 1)
CALL DCOPY(2*n2d*n2d, bn, 1, bpp, 1)
an=(0.d0,0.d0)
bn=(0.d0,0.d0)
CALL DCOPY(2*norb*npol*n2d, anln, 1, anlp, 1)
CALL DCOPY(2*norb*npol*n2d, bnln, 1, bnlp, 1)
anln=(0.d0,0.d0)
bnln=(0.d0,0.d0)
fl=(0.d0,0.d0)
!-------
! adding nonlocal part
CALL ZGEMM('n','n',n2d,norb*npol,n2d,-one,psiper(1,1,kp), &
n2d,f1,n2d,one,funl1,n2d)
do i = 1, norb*npol
do lam = 1, n2d
f1(lam,i) = -zkk(lam)*f1(lam,i)
enddo
enddo
CALL ZGEMM('n','n',n2d,norb*npol,n2d,-one,psiper(1,1,kp), &
n2d,f1,n2d,one,fundl1,n2d)
!-------
!------
! constructs matrices
do i = 1, n2d
do j = 1, n2d
amat(i,j) = fun1(i,j)
amat(n2d+i,j) = fund1(i,j)
amat(i,n2d+j) = -psiper(i,j,kp)
amat(n2d+i,n2d+j) = amat(i,n2d+j)*zkk(j)
xmat(i,j) = psiper(i,j,kp)*ezk(j)
xmat(n2d+i,j) = amat(n2d+i,n2d+j)*ezk(j)
enddo
do j = n2d+1, 2*n2d
xmat(i,j) = -fun1(i,j)
xmat(n2d+i,j) = -fund1(i,j)
enddo
do j = 1, norb*npol
xmat(i,2*n2d+j) = -funl1(i,j)
xmat(n2d+i,2*n2d+j) = -fundl1(i,j)
enddo
enddo
!------
! Solve the system of linear equations
call ZGESV(2*n2d,2*n2d+norb*npol,amat,2*n2d,ipiv,xmat,2*n2d,info)
!-------
! rotates integrals
! !
do i = 1, norb*npol
do j = 1, n2d
f_aux(i,j) = intw1(i,j)
enddo
enddo
call ZGEMM('n','n',norb*npol,n2d,n2d,one,f_aux,norb*npol, &
xmat(1,n2d+1),2*n2d,one,intw1(1,n2d+1),norbf*npol)
call ZGEMM('n','n',norb*npol,norb*npol,n2d,one,f_aux,norb*npol,&
xmat(1,2*n2d+1),2*n2d,one,intw2,norbf*npol)
call ZGEMM('n','n',norb*npol,n2d,n2d,one,f_aux,norb*npol,xmat, &
2*n2d,zero,intw1,norbf*npol)
!--------
!-------
! rotates b coeff. on the left boundary
!
do i = 1, n2d
do j = 1, n2d
amat(i,j) = fun0(i,j)
enddo
enddo
call ZGEMM('n','n',n2d,n2d,n2d,one,amat,2*n2d,xmat, &
2*n2d,zero,fun0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,amat,2*n2d,xmat(1,n2d+1), &
2*n2d,one,fun0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,norb*npol,n2d,one,amat,2*n2d, &
xmat(1,2*n2d+1),2*n2d,one,funl0,n2d)
!---------------
11 continue
!------
! Add to the integrals
DO iorb=1, norb*npol DO iorb=1, norb*npol
iorba=iorb iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2 IF (npol.EQ.2) iorba=(iorb+1)/2
IF(cros(iorba,k).EQ.1) THEN IF(cros(iorba,k).EQ.1) THEN
DO lam=1, n2d DO n=1, n2d
arg=ezk(lam,kp) DO lam=1, n2d
DO n=1, n2d intw1(iorb,n) = intw1(iorb,n) + xmat(n2d+lam,n)*ci(iorb,lam)
intw1(iorb,n)=intw1(iorb,n)+app(lam,n)*ci(iorb,lam,kp)+ & intw1(iorb,n2d+n) = intw1(iorb,n2d+n) + &
bpp(lam,n)*di(iorb,lam,kp) xmat(n2d+lam,n2d+n)*ci(iorb,lam)
ENDDO ENDDO
IF (ABS(AIMAG(zk(lam,kp))).LT.eps) THEN intw1(iorb,n)=intw1(iorb,n)+di(iorb,n)
f1p=-arg*CONJG(di(iorb,lam,kp))*zk2(lam,kp)
fl(lam,iorb)=-arg*CONJG(ci(iorb,lam,kp))*zk2(lam,kp)
ELSE
f1p=-CONJG(ci(iorb,lam,kp))*zk2(lam,kp)
fl(lam,iorb)=-CONJG(di(iorb,lam,kp))*zk2(lam,kp)
ENDIF
bnlp(lam,iorb)=bnlp(lam,iorb)-f1p*emzk(lam,kp)
ENDDO ENDDO
ENDIF
ENDDO
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
IF(cros(iorba,k).EQ.1) THEN
DO iorb1=1, norb*npol DO iorb1=1, norb*npol
iorb1a=iorb1 iorb1a=iorb1
IF (npol.EQ.2) iorb1a=(iorb1+1)/2 IF (npol.EQ.2) iorb1a=(iorb1+1)/2
tr=taunew(3,iorb1a)-taunew(4,iorb1a) tr=taunew(3,iorb1a)-taunew(4,iorb1a)
IF (z(k)+dz.GT.tr) THEN IF (z(k)+dz.GT.tr) THEN
c=(0.d0, 0.d0) c=(0.d0, 0.d0)
DO lam=1, n2d DO lam=1, n2d
c=c+anlp(lam,iorb1)*ci(iorb,lam,kp)+ & c=c+xmat(n2d+lam,2*n2d+iorb1)*ci(iorb,lam)
bnlp(lam,iorb1)*di(iorb,lam,kp)
ENDDO ENDDO
intw2(iorb,iorb1)=intw2(iorb,iorb1)+c intw2(iorb,iorb1)=intw2(iorb,iorb1)+c
ENDIF ENDIF
ENDDO ENDDO
ENDIF ENDIF
ENDDO ENDDO
! !---------------
! Rotation of linear solutions
!
CALL rotatef(app, bpp, bf, anlp, bnlp, bnlf, intw1, intw2, &
n2d, norbf, norb, npol)
! write(6,*) 'done k', k
ENDDO !-------
! wave functions on the right boundary
do i = 1, n2d
do j = 1, 2*n2d
amat(i,j) = xmat(n2d+i,j)*ezk(i)
amat(n2d+i,j) = amat(i,j)*zkk(i)
enddo
amat(i,i) = amat(i,i) + 1.d0
amat(n2d+i,i) = amat(n2d+i,i) - zkk(i)
do j = 1, norb*npol
f1(i,j)=xmat(n2d+i,2*n2d+j)*ezk(i)+f2(i,j)
f2(i,j)=f1(i,j)*zkk(i)
enddo
enddo
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,kp), &
n2d,amat,2*n2d,zero,fun1,n2d)
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,kp), &
n2d,amat(n2d+1,1),2*n2d,zero,fund1,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,kp), &
n2d,f1,n2d,zero,funl1,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,kp), &
n2d,f2,n2d,zero,fundl1,n2d)
!---------
!--------------------------------------------- enddo
CALL DCOPY(2*n2d*n2d, app, 1, al, 1) !-------
! ! wave functions on the left boundary
! To compute the 2nd half of linear solutions do lam = 1, n2d
! arg = cim*tpi*zk(lam,1)*dz
CALL scatter_back(psiper, zk, app, bpp, an, bn, af, ci, di, & zkk(lam) = cim*zk(lam,1)*tpiba
ezk, emzk, s1m, s2m, s3m, s4m, kin, kfin, & ezk(lam) = exp(arg)
nrz, nrzp, norb, cros, dz) enddo
CALL DCOPY(2*n2d*n2d, bpp, 1, bl, 1) do i = 1, n2d
CALL DCOPY(2*n2d*norb*npol, anlp, 1, anll, 1) do j = 1, 2*n2d
amat(i,j) = fun0(i,j)*ezk(i)
amat(n2d+i,j) = -amat(i,j)*zkk(i)
enddo
amat(i,n2d+i) = amat(i,n2d+i) + 1.d0
amat(n2d+i,n2d+i) = amat(n2d+i,n2d+i) + zkk(i)
do j = 1, norb*npol
f1(i,j) = funl0(i,j)*ezk(i)+f0(i,j)
f2(i,j) = -f1(i,j)*zkk(i)
enddo
enddo
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,1), &
n2d,amat,2*n2d,zero,fun0,n2d)
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,1), &
n2d,amat(n2d+1,1),2*n2d,zero,fund0,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,1), &
n2d,f1,n2d,zero,funl0,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,1), &
n2d,f2,n2d,zero,fundl0,n2d)
!---------
! scaling the integrals
CALL DSCAL(2*norbf*npol*2*n2d, sarea, intw1, 1) CALL DSCAL(2*norbf*npol*2*n2d, sarea, intw1, 1)
CALL DSCAL(2*norbf*npol*norbf*npol, sarea, intw2, 1) CALL DSCAL(2*norbf*npol*norbf*npol, sarea, intw2, 1)
!
! To construct functions and derivetives on the boundaries
!
! local solutions
ALLOCATE( s5m( n2d, n2d ) )
ALLOCATE( s6m( n2d, n2d ) )
ALLOCATE( s7m( n2d, n2d ) )
ALLOCATE( s8m( n2d, n2d ) )
! fun0=(0.d0,0.d0)
! fun1=(0.d0,0.d0)
! fund0=(0.d0,0.d0)
! fund1=(0.d0,0.d0)
k = 1
kp = nrzp
do n=1, n2d
do lam=1, n2d
s1m(lam,n)=bf(lam,n)*ezk(lam,k)
s2m(lam,n)=al(lam,n)*ezk(lam,kp)
if (lam.eq.n) s2m(lam,n)=s2m(lam,n)+(1.d0,0.d0)
s3m(lam,n)=-cim*zk(lam,k)*s1m(lam,n)*tpiba
s4m(lam,n)=cim*zk(lam,kp)*s2m(lam,n)*tpiba
if (lam.eq.n) s4m(lam,n)=s4m(lam,n)-2.d0*cim*zk(lam,kp)*tpiba
s5m(lam,n)=bl(lam,n)*ezk(lam,k)
if (lam.eq.n) s5m(lam,n)=s5m(lam,n)+(1.d0,0.d0)
s6m(lam,n)=af(lam,n)*ezk(lam,kp)
s7m(lam,n)=-cim*zk(lam,k)*s5m(lam,n)*tpiba
if (lam.eq.n) s7m(lam,n)=s7m(lam,n)+2.d0*cim*zk(lam,k)*tpiba
s8m(lam,n)=cim*zk(lam,kp)*s6m(lam,n)*tpiba
enddo
enddo
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s1m,n2d,zero,fun0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s2m,n2d,zero,fun1,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s3m,n2d,zero,fund0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s4m,n2d,zero,fund1,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s5m,n2d,zero, &
fun0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s6m,n2d,zero, &
fun1(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s7m,n2d,zero, &
fund0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s8m,n2d,zero, &
fund1(1,n2d+1),n2d)
! nonlocal solutions
funl0=(0.d0,0.d0)
funl1=(0.d0,0.d0)
fundl0=(0.d0,0.d0)
fundl1=(0.d0,0.d0)
do iorb = 1, norb*npol
do lam = 1, n2d
s1=ff(lam,iorb)+bnlf(lam,iorb)*ezk(lam,k)
s2=fl(lam,iorb)+anll(lam,iorb)*ezk(lam,kp)
s3=-cim*zk(lam,k)*s1*tpiba
s4=cim*zk(lam,kp)*s2*tpiba
do ig=1, n2d
funl0(ig,iorb)=funl0(ig,iorb)+psiper(ig,lam,k)*s1
funl1(ig,iorb)=funl1(ig,iorb)+psiper(ig,lam,kp)*s2
fundl0(ig,iorb)=fundl0(ig,iorb)+psiper(ig,lam,k)*s3
fundl1(ig,iorb)=fundl1(ig,iorb)+psiper(ig,lam,kp)*s4
enddo
enddo
enddo
! !
! To construct the functions in the whole rigion zin<z<zfin in the ! To construct the functions in the whole rigion zin<z<zfin in the
! case of multiparallel running ! case of multiparallel running
! !
#ifdef __PARA #ifdef __PARA
CALL rotproc(fun0, fund0, fun1, fund1, funl0, fundl0, funl1, & CALL rotproc(fun0, fund0, fun1, fund1, funl0, fundl0, funl1, &
fundl1, intw1, intw2, n2d, norbf, norb) fundl1, intw1, intw2, n2d, norbf, norb)
#endif #endif
deallocate( psigper )
deallocate( w )
deallocate( w0 )
deallocate( cix )
deallocate( dix )
deallocate( ci )
deallocate( di )
deallocate( inslab )
deallocate( zkk )
deallocate( ezk )
deallocate( emzk )
deallocate( ezk1 )
deallocate( emzk1 )
deallocate( zk2 )
deallocate( amat )
deallocate( xmat )
deallocate( ipiv )
deallocate( f0 )
deallocate( f1 )
deallocate( f2 )
deallocate( f_aux )
CALL stop_clock('scatter_forw') CALL stop_clock('scatter_forw')
DEALLOCATE(ci) return
DEALLOCATE(di)
DEALLOCATE(bf)
DEALLOCATE(an)
DEALLOCATE(bn)
DEALLOCATE(app)
DEALLOCATE(bpp)
DEALLOCATE(al)
DEALLOCATE(bl)
DEALLOCATE(af)
DEALLOCATE(bnlf)
DEALLOCATE(anln)
DEALLOCATE(bnln)
DEALLOCATE(anlp)
DEALLOCATE(bnlp)
DEALLOCATE(anll)
DEALLOCATE(ff)
DEALLOCATE(fl)
DEALLOCATE(s1m)
DEALLOCATE(s2m)
DEALLOCATE(s3m)
DEALLOCATE(s4m)
DEALLOCATE(s5m)
DEALLOCATE(s6m)
DEALLOCATE(s7m)
DEALLOCATE(s8m)
DEALLOCATE(ezk)
DEALLOCATE(emzk)
DEALLOCATE(ezk1)
DEALLOCATE(emzk1)
DEALLOCATE(zk2)
RETURN
END SUBROUTINE scatter_forw END SUBROUTINE scatter_forw