Some optimization (A. Smogunov)

git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@3449 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
smogunov 2006-10-11 09:31:53 +00:00
parent a806c4703e
commit aad16258e6
3 changed files with 265 additions and 501 deletions

View File

@ -32,7 +32,6 @@ local.o \
local_set.o \
poten.o \
print_clock_pwcond.o \
rotate.o \
rotproc.o \
save_cond.o \
scatter_back.o \

View File

@ -1,172 +0,0 @@
!
! Copyright (C) 2003 A. Smogunov
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
!
!
subroutine rotatef(app, bpp, bf, anlp, bnlp, bnlf, intw1, intw2, &
n2d, norbf, norbnow, npol)
!
! This subroutine makes a linear combination of the solutions
! in such a way that bpp at this slab becomes a delta symbol.
! It works for forward iterative process.
!
#include "f_defs.h"
USE kinds, only : DP
implicit none
integer :: norbf, n2d, norbnow, lam, n, n1, iorb, iorb1, npol, info
integer, allocatable :: ipiv(:)
complex(DP) :: &
app(n2d, n2d), & ! coeff. of local functions
bpp(n2d, n2d), & ! --
bf(n2d, n2d), & ! --
anlp(n2d, norbnow*npol), & ! coeff. of nonloc. functions
bnlp(n2d, norbnow*npol), & ! --
bnlf(n2d, norbnow*npol), & ! --
intw1(norbf*npol, 2*n2d), & ! integral of loc. fun.
intw2(norbf*npol, norbf*npol) ! integral of nonloc. fun.
complex(DP), allocatable :: h(:,:), aux(:,:)
complex(DP), parameter :: one=(1.d0,0.d0), zero=(0.d0,0.d0)
call start_clock('rotatef')
allocate( h( n2d, n2d ) )
allocate( ipiv( n2d ) )
!
! To find the needed matrix h of the linear transformation
!
h=(0.d0,0.d0)
do lam=1, n2d
h(lam,lam)=(1.d0,0.d0)
enddo
call ZGESV(n2d,n2d,bpp,n2d,ipiv,h,n2d,info)
!
! To rotate app, bf, bpp
!
allocate( aux( n2d, n2d ) )
call ZGEMM('n','n',n2d,n2d,n2d,one,app,n2d,h,n2d,zero,aux,n2d)
app=aux
call ZGEMM('n','n',n2d,n2d,n2d,one,bf,n2d,h,n2d,zero,aux,n2d)
bf=aux
bpp=(0.d0,0.d0)
do lam=1, n2d
bpp(lam,lam)=(1.d0,0.d0)
enddo
deallocate(aux)
!
! To recalculate intw1 with new functions
!
if (norbnow==0) goto 100
allocate( aux( norbf*npol, n2d ) )
call ZGEMM('n','n',norbnow*npol,n2d,n2d,one,intw1,norbf*npol,h,n2d,zero,&
aux,norbf*npol)
do iorb=1,norbnow*npol
do n=1,n2d
intw1(iorb,n)=aux(iorb,n)
enddo
enddo
deallocate(aux)
!
! To reobtain nonlinear functions and the integrals
! intw2 on them.
!
call ZGEMM('n','n',n2d,norbnow*npol,n2d,-one,app,n2d,bnlp,n2d,one,&
anlp,n2d)
call ZGEMM('n','n',n2d,norbnow*npol,n2d,-one,bf,n2d,bnlp,n2d,one,&
bnlf,n2d)
call ZGEMM('n','n',norbnow*npol,norbnow*npol,n2d,-one,intw1,norbf*npol, &
bnlp,n2d,one,intw2,norbf*npol)
bnlp=(0.d0,0.d0)
100 continue
deallocate(h)
deallocate(ipiv)
call stop_clock('rotatef')
return
end subroutine rotatef
!------------------------------------------
subroutine rotateb (app, bpp, af, intw1, n2d, norbf, norbnow, npol)
!
! This subroutine makes a linear combination of the solutions
! in such a way that app at this slab becomes a delta symbol.
! It works for backward iterative process.
!
#include "f_defs.h"
USE kinds, only : DP
implicit none
integer :: norbf, n2d, norbnow, lam, n, n1, iorb, npol, info
integer, allocatable :: ipiv(:)
complex(DP) :: app(n2d,n2d), af(n2d,n2d), bpp(n2d,n2d), &
intw1(norbf*npol,2*n2d)
complex(DP), allocatable :: h(:,:), aux(:,:), aux1(:,:)
complex(DP), parameter :: one=(1.d0,0.d0), zero=(0.d0,0.d0)
call start_clock('rotateb')
allocate( h( n2d, n2d ) )
allocate( ipiv( n2d ) )
!
! To find the needed matrix h of the linear transformation
!
h=(0.d0,0.d0)
do lam=1, n2d
h(lam,lam)=(1.d0,0.d0)
enddo
call ZGESV(n2d, n2d, app, n2d, ipiv, h, n2d, info)
!
! To rotate app, bpp, af
!
allocate( aux( n2d, n2d ) )
call ZGEMM('n','n',n2d,n2d,n2d,one,bpp,n2d,h,n2d,zero,aux,n2d)
bpp=aux
call ZGEMM('n','n',n2d,n2d,n2d,one,af,n2d,h,n2d,zero,aux,n2d)
af=aux
app=(0.d0,0.d0)
do lam=1, n2d
app(lam,lam)=(1.d0,0.d0)
enddo
deallocate(aux)
!
! To recalculate intw1 with new functions
!
if (norbnow==0) goto 100
allocate( aux( norbf*npol, n2d ) )
allocate( aux1( norbf*npol, n2d ) )
do iorb=1,norbnow*npol
do n=1,n2d
aux1(iorb,n)= intw1(iorb,n2d+n)
enddo
enddo
call ZGEMM('n','n',norbnow*npol,n2d,n2d,one,aux1,norbf*npol, &
h,n2d,zero,aux,norbf*npol)
do iorb=1,norbnow*npol
do n=1,n2d
intw1(iorb,n2d+n)= aux(iorb,n)
enddo
enddo
deallocate(aux)
deallocate(aux1)
100 continue
deallocate(h)
deallocate(ipiv)
call stop_clock('rotateb')
return
end subroutine rotateb

View File

@ -7,6 +7,7 @@
!
! Optimized Aug. 2004 (ADC)
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
! Optimized Oct. 2006 (A. Smogunov)
!
#include "f_defs.h"
!
@ -34,8 +35,8 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
tblm(4,norb), &
k, kz, n, lam, ig, lam1, mdim, itt, nbb, iorb, iorb1, &
iorba, iorb1a, is, kp, nok, k1, nt, nb, kin, kfin
INTEGER :: info
INTEGER, ALLOCATABLE :: inslab(:)
INTEGER :: i, j, info
INTEGER, ALLOCATABLE :: ipiv(:), inslab(:)
real(DP) :: z(nrz+1), r(1:ndmx,npsx), rab(1:ndmx,npsx), &
betar(1:ndmx,nbrx,npsx), taunew(4,norb)
REAL(DP), PARAMETER :: eps=1.d-8
@ -47,23 +48,22 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
zk(n2d,nrzp)
COMPLEX(DP), ALLOCATABLE :: &
psigper(:,:), & ! psigper(g,lam)=newbg(g,lam1) psiper(lam1,lam)
w0(:,:,:), & ! w0(z,g,m) are 2D Fourier components (see four.f)
w(:,:,:), & ! w(z,lam,m)=psigper(g,lam)^* \exp{-igr^m_perp}
w0(:,:,:), & ! w0(z,g,m) are 2D Fourier components (see four.f)
w(:,:,:), & ! w(z,lam,m)=psigper(g,lam)^* \exp{-igr^m_perp}
! w0(z,g,m)
ci(:,:,:), & ! ci(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
hmat(:,:), & ! ci(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
! w(z,lam,m)^*\exp{izk(lam,k)(z-z(k))}
di(:,:,:), & ! di(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
amat(:,:), & ! di(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
! w(z,lam,m)^*\exp{izk(lam,k)(z(k+1)-z)}
cix(:,:,:), & !
dix(:,:,:), & !
bf(:,:), an(:,:), bn(:,:), &
app(:,:), bpp(:,:), al(:,:), &
bl(:,:), af(:,:), &
bnlf(:,:), anln(:,:), bnln(:,:), &
anlp(:,:), bnlp(:,:), anll(:,:), &
ff(:,:), fl(:,:), ezk(:,:), emzk(:,:), zk2(:,:), s1m(:,:), s2m(:,:), &
s3m(:,:), s4m(:,:), s5m(:,:), s6m(:,:), s7m(:,:), s8m(:,:), &
ezk1(:,:), emzk1(:,:)
xmat(:,:), & !
ci(:,:), & ! ci(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
! w(z,lam,m)^*\exp{izk(lam,k)(z-z(k))}
di(:,:), & ! di(m,lam,k)=\int_{z(k)}^{z(k+1)} dz
! w(z,lam,m)^*\exp{izk(lam,k)(z(k+1)-z)}
cix(:,:,:), & !
dix(:,:,:), & !
f0(:,:), f1(:,:), f2(:,:), f_aux(:,:), &
zkk(:), ezk(:), emzk(:), zk2(:), ezk1(:,:), emzk1(:,:)
CALL start_clock('scatter_forw')
@ -77,37 +77,31 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
!
call divide(nrz,kin,kfin)
!------------------------
! Start of 2D Fourier components calculations and depending
! variables
!
ALLOCATE( psigper( ngper*npol, n2d ) )
ALLOCATE( w( nz1, n2d, norb*npol ) )
ALLOCATE( w0( nz1, ngper, 5 ) )
ALLOCATE( cix( nz1, n2d, norb*npol ) )
ALLOCATE( dix( nz1, n2d, norb*npol ) )
ALLOCATE( ci( norb*npol, n2d, nrzp ) )
ALLOCATE( di( norb*npol, n2d, nrzp ) )
ALLOCATE( ci( norb*npol, n2d ) )
ALLOCATE( di( norb*npol, n2d ) )
ALLOCATE( inslab( norb ) )
ALLOCATE( ezk( n2d, nrzp ) )
ALLOCATE( emzk( n2d, nrzp ) )
ALLOCATE( zkk( n2d ) )
ALLOCATE( ezk( n2d ) )
ALLOCATE( emzk( n2d ) )
ALLOCATE( ezk1( nz1, n2d ) )
ALLOCATE( emzk1( nz1, n2d ) )
ALLOCATE( zk2( n2d, nrzp ) )
ALLOCATE( zk2( n2d ) )
ALLOCATE( amat( 2*n2d, 2*n2d ) )
ALLOCATE( xmat( 2*n2d, 2*n2d+norb*npol ) )
ALLOCATE( ipiv(2*n2d) )
ALLOCATE( f0(n2d,norb*npol) )
ALLOCATE( f1(n2d,norb*npol) )
ALLOCATE( f2(n2d,norb*npol) )
ALLOCATE( f_aux(norb*npol,n2d) )
intw1=(0.d0,0.d0)
intw2=(0.d0,0.d0)
do k=kin,kfin
kp = k-kin+1
do lam=1,n2d
arg=cim*tpi*zk(lam, kp)*dz
ezk(lam,kp)=exp(arg)
emzk(lam,kp)=exp(-arg)
zk2(lam,kp)=cim/(2.d0*zk(lam,kp)*tpiba)
enddo
enddo
!
! some orbitals relations
!
@ -129,26 +123,37 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
endif
enddo
CALL start_clock('integrals')
!
! The loop over slabs to compute ci, di, and initial intw2
!
!--- initial conditions for a_n coefficients
xmat = 0.d0
do lam = n2d+1, 2*n2d
xmat(lam,lam) = 1.d0
enddo
!---
do k = kin, kfin
kp = k-kin+1
! write(6,*) 'integrals k=', k
!------
! Start of 2D Fourier components calculations and depending
! variables
!
do lam=1,n2d
arg=cim*tpi*zk(lam, kp)*dz
zkk(lam)=cim*zk(lam,kp)*tpiba
ezk(lam)=exp(arg)
emzk(lam)=exp(-arg)
zk2(lam)=cim/(2.d0*zk(lam,kp)*tpiba)
DO lam=1,n2d
arg=cim*zk(lam,kp)*dz1*tpi
fact=EXP(arg)
factm=EXP(-arg)
ezk1(1,lam)=fact
emzk1(1,lam)=factm
DO k1=2,nz1
ezk1(k1,lam)=ezk1(k1-1,lam)*fact
emzk1(k1,lam)=emzk1(k1-1,lam)*factm
ENDDO
ENDDO
arg=cim*tpi*zk(lam,kp)*dz1
fact=exp(arg)
factm=exp(-arg)
ezk1(1,lam)=fact
emzk1(1,lam)=factm
do k1=2,nz1
ezk1(k1,lam)=ezk1(k1-1,lam)*fact
emzk1(k1,lam)=emzk1(k1-1,lam)*factm
enddo
enddo
if(ewind.le.100.d0) then
CALL ZGEMM('n', 'n', ngper*npol, n2d, n2d, one, newbg, &
@ -158,7 +163,9 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
psigper(:,:) = psiper(:,:,kp)
endif
w=(0.d0,0.d0)
w = 0.d0
ci = 0.d0
di = 0.d0
DO iorb=1, norb
IF(cros(iorb,k).EQ.1.AND.inslab(iorb).EQ.iorb) THEN
mdim = 2*tblm(3,iorb)+1
@ -202,9 +209,9 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
IF (npol.EQ.2) iorba=(iorb+1)/2
IF (cros(iorba,k).EQ.1) THEN
DO lam=1, n2d
ci(iorb,lam,kp)=int1d(w(1,lam,iorb), &
ci(iorb,lam)=int1d(w(1,lam,iorb), &
zk(lam,kp),dz,dz1,nz1,tpiba,1)
di(iorb,lam,kp)=int1d(w(1,lam,iorb), &
di(iorb,lam)=int1d(w(1,lam,iorb), &
zk(lam,kp),dz,dz1,nz1,tpiba,-1)
ENDDO
DO iorb1=1, norb*npol
@ -215,171 +222,135 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
intw2(iorb,iorb1)=intw2(iorb,iorb1)- &
int2d(w(1,lam,iorb),w(1,lam,iorb1),cix(1,lam,iorb1), &
dix(1,lam,iorb1),ezk1(1,lam),emzk1(1,lam), &
zk(lam,kp),dz1,tpiba,nz1)*zk2(lam,kp)
zk(lam,kp),dz1,tpiba,nz1)*zk2(lam)
ENDDO
ENDIF
ENDDO
ENDIF
ENDDO
ENDDO
CALL stop_clock('integrals')
DEALLOCATE(psigper)
DEALLOCATE(cix)
DEALLOCATE(dix)
DEALLOCATE(w)
DEALLOCATE(w0)
DEALLOCATE(inslab)
!-----------------------------------
!----
!-----------
! computes f1 and f2
!
! Some allocation for iterative process
!
ALLOCATE( bf( n2d, n2d ) )
ALLOCATE( an( n2d, n2d ) )
ALLOCATE( bn( n2d, n2d ) )
ALLOCATE( app( n2d, n2d ) )
ALLOCATE( bpp( n2d, n2d ) )
ALLOCATE( al( n2d, n2d ) )
ALLOCATE( bl( n2d, n2d ) )
ALLOCATE( af( n2d, n2d ) )
ALLOCATE( s1m( n2d, n2d ) )
ALLOCATE( s2m( n2d, n2d ) )
ALLOCATE( s3m( n2d, n2d ) )
ALLOCATE( s4m( n2d, n2d ) )
ALLOCATE( bnlf( n2d, norb*npol ) )
ALLOCATE( anln( n2d, norb*npol ) )
ALLOCATE( bnln( n2d, norb*npol ) )
ALLOCATE( anlp( n2d, norb*npol ) )
ALLOCATE( bnlp( n2d, norb*npol ) )
ALLOCATE( anll( n2d, norb*npol ) )
ALLOCATE( ff( n2d, norb*npol ) )
ALLOCATE( fl( n2d, norb*npol ) )
!
! We set up the starting values
!
app=(0.d0,0.d0)
an=(0.d0,0.d0)
bpp=(0.d0,0.d0)
bn=(0.d0,0.d0)
bf=(0.d0,0.d0)
anlp=(0.d0,0.d0)
anln=(0.d0,0.d0)
bnlp=(0.d0,0.d0)
bnln=(0.d0,0.d0)
bnlf=(0.d0,0.d0)
ff=(0.d0,0.d0)
fl=(0.d0,0.d0)
DO lam=1, n2d
bf(lam,lam)=(1.d0,0.d0)
bpp(lam,lam)=(1.d0,0.d0)
ENDDO
!
! To compute intw1, ff, fl for the first slab
!
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
IF (cros(iorba,kin).EQ.1) THEN
DO lam=1, n2d
intw1(iorb,lam)=di(iorb, lam, 1)
arg=ezk(lam, 1)
IF (ABS(AIMAG(zk(lam, 1))).LT.eps) THEN
ff(lam,iorb)=-arg*CONJG(di(iorb,lam,1))*zk2(lam,1)
fl(lam,iorb)=-arg*CONJG(ci(iorb,lam,1))*zk2(lam,1)
f1 = 0.d0
f2 = 0.d0
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
IF (cros(iorba,k).EQ.1) THEN
DO lam=1, n2d
IF (ABS(AIMAG(zk(lam, kp))).LT.eps) THEN
f1(lam,iorb)=-ezk(lam)*CONJG(di(iorb,lam))*zk2(lam)
f2(lam,iorb)=-ezk(lam)*CONJG(ci(iorb,lam))*zk2(lam)
ELSE
ff(lam,iorb)=-CONJG(ci(iorb,lam,1))*zk2(lam,1)
fl(lam,iorb)=-CONJG(di(iorb,lam,1))*zk2(lam,1)
f1(lam,iorb)=-CONJG(ci(iorb,lam))*zk2(lam)
f2(lam,iorb)=-CONJG(di(iorb,lam))*zk2(lam)
ENDIF
ENDDO
ENDIF
ENDDO
ENDDO
ENDIF
ENDDO
!------------
!------------------------------------
! The main loop over slabs
if(kp.eq.1) then
!-------
! b coeff. and fp on the left boundary
fun0 = 0.d0
funl0 = 0.d0
f0 = f1
do n = 1, n2d
fun0(n,n) = 1.d0
enddo
!-----------
goto 11
endif
!-------
! adding nonlocal part
CALL ZGEMM('n','n',n2d,norb*npol,n2d,-one,psiper(1,1,kp), &
n2d,f1,n2d,one,funl1,n2d)
do i = 1, norb*npol
do lam = 1, n2d
f1(lam,i) = -zkk(lam)*f1(lam,i)
enddo
enddo
CALL ZGEMM('n','n',n2d,norb*npol,n2d,-one,psiper(1,1,kp), &
n2d,f1,n2d,one,fundl1,n2d)
!-------
!------
! constructs matrices
do i = 1, n2d
do j = 1, n2d
amat(i,j) = fun1(i,j)
amat(n2d+i,j) = fund1(i,j)
amat(i,n2d+j) = -psiper(i,j,kp)
amat(n2d+i,n2d+j) = amat(i,n2d+j)*zkk(j)
xmat(i,j) = psiper(i,j,kp)*ezk(j)
xmat(n2d+i,j) = amat(n2d+i,n2d+j)*ezk(j)
enddo
do j = n2d+1, 2*n2d
xmat(i,j) = -fun1(i,j)
xmat(n2d+i,j) = -fund1(i,j)
enddo
do j = 1, norb*npol
xmat(i,2*n2d+j) = -funl1(i,j)
xmat(n2d+i,2*n2d+j) = -fundl1(i,j)
enddo
enddo
!------
! Solve the system of linear equations
call ZGESV(2*n2d,2*n2d+norb*npol,amat,2*n2d,ipiv,xmat,2*n2d,info)
!-------
! rotates integrals
!
DO k=kin+1, kfin
kp = k-kin+1
nok = 0
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
tr=taunew(3,iorba)-taunew(4,iorba)
IF (z(k)+dz.GT.tr) nok=iorb
ENDDO
DO lam=1, n2d
DO lam1=1,n2d
c=ZDOTC(n2d,psiper(1,lam,kp),1,psiper(1,lam1,kp-1),1)
s1m(lam,lam1)=(zk(lam,kp)+zk(lam1,kp-1))/zk(lam,kp)*c
s2m(lam,lam1)=(zk(lam,kp)-zk(lam1,kp-1))/zk(lam,kp)*c
c=ezk(lam1,kp-1)
s3m(lam,lam1)=s1m(lam,lam1)*c
s4m(lam,lam1)=s2m(lam,lam1)*c
ENDDO
ENDDO
CALL ZGEMM('n','n',n2d,n2d,n2d,one,s3m,n2d,app,n2d,one,an,n2d)
CALL ZGEMM('n','n',n2d,n2d,n2d,one,s4m,n2d,app,n2d,one,bn,n2d)
an= an+s2m
bn= bn+s1m
CALL ZGEMM('n','n',n2d,nok,n2d,one,s3m,n2d,anlp,n2d,one,anln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s1m,n2d,fl,n2d,one,anln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s4m,n2d,anlp,n2d,one,bnln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s2m,n2d,fl,n2d,one,bnln,n2d)
an=an*0.5d0
bn=bn*0.5d0
anln=anln*0.5d0
bnln=bnln*0.5d0
DO lam=1, n2d
DO n=1, n2d
bn(lam,n)=bn(lam,n)*emzk(lam,kp)
ENDDO
DO iorb=1, nok
bnln(lam,iorb)=bnln(lam,iorb)*emzk(lam,kp)
ENDDO
ENDDO
CALL DCOPY(2*n2d*n2d, an, 1, app, 1)
CALL DCOPY(2*n2d*n2d, bn, 1, bpp, 1)
an=(0.d0,0.d0)
bn=(0.d0,0.d0)
CALL DCOPY(2*norb*npol*n2d, anln, 1, anlp, 1)
CALL DCOPY(2*norb*npol*n2d, bnln, 1, bnlp, 1)
anln=(0.d0,0.d0)
bnln=(0.d0,0.d0)
fl=(0.d0,0.d0)
do i = 1, norb*npol
do j = 1, n2d
f_aux(i,j) = intw1(i,j)
enddo
enddo
call ZGEMM('n','n',norb*npol,n2d,n2d,one,f_aux,norb*npol, &
xmat(1,n2d+1),2*n2d,one,intw1(1,n2d+1),norbf*npol)
call ZGEMM('n','n',norb*npol,norb*npol,n2d,one,f_aux,norb*npol,&
xmat(1,2*n2d+1),2*n2d,one,intw2,norbf*npol)
call ZGEMM('n','n',norb*npol,n2d,n2d,one,f_aux,norb*npol,xmat, &
2*n2d,zero,intw1,norbf*npol)
!--------
!-------
! rotates b coeff. on the left boundary
!
do i = 1, n2d
do j = 1, n2d
amat(i,j) = fun0(i,j)
enddo
enddo
call ZGEMM('n','n',n2d,n2d,n2d,one,amat,2*n2d,xmat, &
2*n2d,zero,fun0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,amat,2*n2d,xmat(1,n2d+1), &
2*n2d,one,fun0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,norb*npol,n2d,one,amat,2*n2d, &
xmat(1,2*n2d+1),2*n2d,one,funl0,n2d)
!---------------
11 continue
!------
! Add to the integrals
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
IF(cros(iorba,k).EQ.1) THEN
DO lam=1, n2d
arg=ezk(lam,kp)
DO n=1, n2d
intw1(iorb,n)=intw1(iorb,n)+app(lam,n)*ci(iorb,lam,kp)+ &
bpp(lam,n)*di(iorb,lam,kp)
DO n=1, n2d
DO lam=1, n2d
intw1(iorb,n) = intw1(iorb,n) + xmat(n2d+lam,n)*ci(iorb,lam)
intw1(iorb,n2d+n) = intw1(iorb,n2d+n) + &
xmat(n2d+lam,n2d+n)*ci(iorb,lam)
ENDDO
IF (ABS(AIMAG(zk(lam,kp))).LT.eps) THEN
f1p=-arg*CONJG(di(iorb,lam,kp))*zk2(lam,kp)
fl(lam,iorb)=-arg*CONJG(ci(iorb,lam,kp))*zk2(lam,kp)
ELSE
f1p=-CONJG(ci(iorb,lam,kp))*zk2(lam,kp)
fl(lam,iorb)=-CONJG(di(iorb,lam,kp))*zk2(lam,kp)
ENDIF
bnlp(lam,iorb)=bnlp(lam,iorb)-f1p*emzk(lam,kp)
intw1(iorb,n)=intw1(iorb,n)+di(iorb,n)
ENDDO
ENDIF
ENDDO
DO iorb=1, norb*npol
iorba=iorb
IF (npol.EQ.2) iorba=(iorb+1)/2
IF(cros(iorba,k).EQ.1) THEN
DO iorb1=1, norb*npol
iorb1a=iorb1
IF (npol.EQ.2) iorb1a=(iorb1+1)/2
@ -387,102 +358,76 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
IF (z(k)+dz.GT.tr) THEN
c=(0.d0, 0.d0)
DO lam=1, n2d
c=c+anlp(lam,iorb1)*ci(iorb,lam,kp)+ &
bnlp(lam,iorb1)*di(iorb,lam,kp)
c=c+xmat(n2d+lam,2*n2d+iorb1)*ci(iorb,lam)
ENDDO
intw2(iorb,iorb1)=intw2(iorb,iorb1)+c
ENDIF
ENDDO
ENDIF
ENDDO
!
! Rotation of linear solutions
!
!---------------
CALL rotatef(app, bpp, bf, anlp, bnlp, bnlf, intw1, intw2, &
n2d, norbf, norb, npol)
! write(6,*) 'done k', k
ENDDO
!-------
! wave functions on the right boundary
do i = 1, n2d
do j = 1, 2*n2d
amat(i,j) = xmat(n2d+i,j)*ezk(i)
amat(n2d+i,j) = amat(i,j)*zkk(i)
enddo
amat(i,i) = amat(i,i) + 1.d0
amat(n2d+i,i) = amat(n2d+i,i) - zkk(i)
do j = 1, norb*npol
f1(i,j)=xmat(n2d+i,2*n2d+j)*ezk(i)+f2(i,j)
f2(i,j)=f1(i,j)*zkk(i)
enddo
enddo
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,kp), &
n2d,amat,2*n2d,zero,fun1,n2d)
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,kp), &
n2d,amat(n2d+1,1),2*n2d,zero,fund1,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,kp), &
n2d,f1,n2d,zero,funl1,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,kp), &
n2d,f2,n2d,zero,fundl1,n2d)
!---------
!---------------------------------------------
enddo
CALL DCOPY(2*n2d*n2d, app, 1, al, 1)
!
! To compute the 2nd half of linear solutions
!
CALL scatter_back(psiper, zk, app, bpp, an, bn, af, ci, di, &
ezk, emzk, s1m, s2m, s3m, s4m, kin, kfin, &
nrz, nrzp, norb, cros, dz)
!-------
! wave functions on the left boundary
do lam = 1, n2d
arg = cim*tpi*zk(lam,1)*dz
zkk(lam) = cim*zk(lam,1)*tpiba
ezk(lam) = exp(arg)
enddo
CALL DCOPY(2*n2d*n2d, bpp, 1, bl, 1)
CALL DCOPY(2*n2d*norb*npol, anlp, 1, anll, 1)
do i = 1, n2d
do j = 1, 2*n2d
amat(i,j) = fun0(i,j)*ezk(i)
amat(n2d+i,j) = -amat(i,j)*zkk(i)
enddo
amat(i,n2d+i) = amat(i,n2d+i) + 1.d0
amat(n2d+i,n2d+i) = amat(n2d+i,n2d+i) + zkk(i)
do j = 1, norb*npol
f1(i,j) = funl0(i,j)*ezk(i)+f0(i,j)
f2(i,j) = -f1(i,j)*zkk(i)
enddo
enddo
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,1), &
n2d,amat,2*n2d,zero,fun0,n2d)
CALL ZGEMM('n','n',n2d,2*n2d,n2d,one,psiper(1,1,1), &
n2d,amat(n2d+1,1),2*n2d,zero,fund0,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,1), &
n2d,f1,n2d,zero,funl0,n2d)
CALL ZGEMM('n','n',n2d,norb*npol,n2d,one,psiper(1,1,1), &
n2d,f2,n2d,zero,fundl0,n2d)
!---------
! scaling the integrals
CALL DSCAL(2*norbf*npol*2*n2d, sarea, intw1, 1)
CALL DSCAL(2*norbf*npol*norbf*npol, sarea, intw2, 1)
!
! To construct functions and derivetives on the boundaries
!
! local solutions
ALLOCATE( s5m( n2d, n2d ) )
ALLOCATE( s6m( n2d, n2d ) )
ALLOCATE( s7m( n2d, n2d ) )
ALLOCATE( s8m( n2d, n2d ) )
! fun0=(0.d0,0.d0)
! fun1=(0.d0,0.d0)
! fund0=(0.d0,0.d0)
! fund1=(0.d0,0.d0)
k = 1
kp = nrzp
do n=1, n2d
do lam=1, n2d
s1m(lam,n)=bf(lam,n)*ezk(lam,k)
s2m(lam,n)=al(lam,n)*ezk(lam,kp)
if (lam.eq.n) s2m(lam,n)=s2m(lam,n)+(1.d0,0.d0)
s3m(lam,n)=-cim*zk(lam,k)*s1m(lam,n)*tpiba
s4m(lam,n)=cim*zk(lam,kp)*s2m(lam,n)*tpiba
if (lam.eq.n) s4m(lam,n)=s4m(lam,n)-2.d0*cim*zk(lam,kp)*tpiba
s5m(lam,n)=bl(lam,n)*ezk(lam,k)
if (lam.eq.n) s5m(lam,n)=s5m(lam,n)+(1.d0,0.d0)
s6m(lam,n)=af(lam,n)*ezk(lam,kp)
s7m(lam,n)=-cim*zk(lam,k)*s5m(lam,n)*tpiba
if (lam.eq.n) s7m(lam,n)=s7m(lam,n)+2.d0*cim*zk(lam,k)*tpiba
s8m(lam,n)=cim*zk(lam,kp)*s6m(lam,n)*tpiba
enddo
enddo
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s1m,n2d,zero,fun0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s2m,n2d,zero,fun1,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s3m,n2d,zero,fund0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s4m,n2d,zero,fund1,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s5m,n2d,zero, &
fun0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s6m,n2d,zero, &
fun1(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s7m,n2d,zero, &
fund0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kp),n2d,s8m,n2d,zero, &
fund1(1,n2d+1),n2d)
! nonlocal solutions
funl0=(0.d0,0.d0)
funl1=(0.d0,0.d0)
fundl0=(0.d0,0.d0)
fundl1=(0.d0,0.d0)
do iorb = 1, norb*npol
do lam = 1, n2d
s1=ff(lam,iorb)+bnlf(lam,iorb)*ezk(lam,k)
s2=fl(lam,iorb)+anll(lam,iorb)*ezk(lam,kp)
s3=-cim*zk(lam,k)*s1*tpiba
s4=cim*zk(lam,kp)*s2*tpiba
do ig=1, n2d
funl0(ig,iorb)=funl0(ig,iorb)+psiper(ig,lam,k)*s1
funl1(ig,iorb)=funl1(ig,iorb)+psiper(ig,lam,kp)*s2
fundl0(ig,iorb)=fundl0(ig,iorb)+psiper(ig,lam,k)*s3
fundl1(ig,iorb)=fundl1(ig,iorb)+psiper(ig,lam,kp)*s4
enddo
enddo
enddo
!
! To construct the functions in the whole rigion zin<z<zfin in the
! case of multiparallel running
@ -491,39 +436,31 @@ subroutine scatter_forw(nrz, nrzp, z, psiper, zk, norb, tblm, cros, &
CALL rotproc(fun0, fund0, fun1, fund1, funl0, fundl0, funl1, &
fundl1, intw1, intw2, n2d, norbf, norb)
#endif
deallocate( psigper )
deallocate( w )
deallocate( w0 )
deallocate( cix )
deallocate( dix )
deallocate( ci )
deallocate( di )
deallocate( inslab )
deallocate( zkk )
deallocate( ezk )
deallocate( emzk )
deallocate( ezk1 )
deallocate( emzk1 )
deallocate( zk2 )
deallocate( amat )
deallocate( xmat )
deallocate( ipiv )
deallocate( f0 )
deallocate( f1 )
deallocate( f2 )
deallocate( f_aux )
CALL stop_clock('scatter_forw')
DEALLOCATE(ci)
DEALLOCATE(di)
DEALLOCATE(bf)
DEALLOCATE(an)
DEALLOCATE(bn)
DEALLOCATE(app)
DEALLOCATE(bpp)
DEALLOCATE(al)
DEALLOCATE(bl)
DEALLOCATE(af)
DEALLOCATE(bnlf)
DEALLOCATE(anln)
DEALLOCATE(bnln)
DEALLOCATE(anlp)
DEALLOCATE(bnlp)
DEALLOCATE(anll)
DEALLOCATE(ff)
DEALLOCATE(fl)
DEALLOCATE(s1m)
DEALLOCATE(s2m)
DEALLOCATE(s3m)
DEALLOCATE(s4m)
DEALLOCATE(s5m)
DEALLOCATE(s6m)
DEALLOCATE(s7m)
DEALLOCATE(s8m)
DEALLOCATE(ezk)
DEALLOCATE(emzk)
DEALLOCATE(ezk1)
DEALLOCATE(emzk1)
DEALLOCATE(zk2)
RETURN
return
END SUBROUTINE scatter_forw