Old parallel variables (such as me mypool) removed from PWCOND.

C.S.


git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@1423 c92efa57-630b-4861-b058-cf58834340f0
This commit is contained in:
sbraccia 2004-11-04 13:08:25 +00:00
parent 6a4140479e
commit b1f2db7398
4 changed files with 673 additions and 682 deletions

View File

@ -5,141 +5,136 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
subroutine do_cond(nodenumber)
#include "f_defs.h"
!
SUBROUTINE do_cond(nodenumber)
!
! This is the main driver of the pwcond.x program.
! It calculates the complex band structure, solves the
! scattering problem and calculates the transmission coefficient.
!
#include "f_defs.h"
USE ions_base, ONLY : nat, ityp, ntyp => nsp, tau
use pwcom
use cond
use io_files
use io_global, only : ionode_id
#ifdef __PARA
use para, only: me
use mp
#endif
implicit none
character(len=3) nodenumber
real(kind=DP), parameter :: eps=1.d-8
real(kind=DP) :: wtot
integer :: ik, ien, ios, orbin, orbfin
logical :: write0
USE pwcom
USE cond
USE io_files
USE io_global, ONLY : ionode, ionode_id
namelist /inputcond/ outdir, prefix, band_file, tran_file, fil_loc, &
USE mp
IMPLICIT NONE
CHARACTER(len=3) nodenumber
REAL(kind=DP), PARAMETER :: eps=1.d-8
REAL(kind=DP) :: wtot
INTEGER :: ik, ien, ios, orbin, orbfin
LOGICAL :: write0
NAMELIST /inputcond/ outdir, prefix, band_file, tran_file, fil_loc, &
lwrite_loc, lread_loc, ikind, iofspin, llocal, &
bdl1, bdl2, bdr1, bdr2, nz1, dnslab, energy0, &
denergy, ecut2d, ewind, epsproj, delgep,cutplot,&
llapack
CHARACTER (LEN=80) :: input_file
INTEGER :: nargs, iiarg, ierr, ilen
INTEGER :: nargs, iiarg, ierr, ILEN
INTEGER, EXTERNAL :: iargc
nd_nmbr=nodenumber
call init_clocks(.TRUE.)
call start_clock('PWCOND')
call start_clock('init')
CALL init_clocks(.TRUE.)
CALL start_clock('PWCOND')
CALL start_clock('init')
!
! set default values for variables in namelist
!
outdir = './'
prefix = ' '
band_file = ' '
tran_file = ' '
fil_loc = ' '
lwrite_loc = .false.
lread_loc = .false.
iofspin = 1
ikind = 0
bdl1 = 0.d0
bdl2 = 0.d0
bdr1 = 0.d0
bdr2 = 0.d0
nz1 = 11
dnslab = 0
energy0 = 0.d0
denergy = 0.d0
ecut2d = 0.d0
ewind = 0.d0
llocal = .false.
llapack = .false.
epsproj = 1.d-3
delgep = 0.d0
cutplot = 2.d0
outdir = './'
prefix = ' '
band_file = ' '
tran_file = ' '
fil_loc = ' '
lwrite_loc = .FALSE.
lread_loc = .FALSE.
iofspin = 1
ikind = 0
bdl1 = 0.d0
bdl2 = 0.d0
bdr1 = 0.d0
bdr2 = 0.d0
nz1 = 11
dnslab = 0
energy0 = 0.d0
denergy = 0.d0
ecut2d = 0.d0
ewind = 0.d0
llocal = .FALSE.
llapack = .FALSE.
epsproj = 1.d-3
delgep = 0.d0
cutplot = 2.d0
#ifdef __PARA
if (me == 1) then
#endif
!
! ... Input from file ?
!
nargs = iargc()
!
DO iiarg = 1, ( nargs - 1 )
IF ( ionode ) THEN
!
CALL getarg( iiarg, input_file )
IF ( TRIM( input_file ) == '-input' .OR. &
TRIM( input_file ) == '-inp' .OR. &
TRIM( input_file ) == '-in' ) THEN
!
CALL getarg( ( iiarg + 1 ) , input_file )
OPEN ( UNIT = 5, FILE = input_file, FORM = 'FORMATTED', &
STATUS = 'OLD', IOSTAT = ierr )
CALL errore( 'iosys', 'input file ' // TRIM( input_file ) // &
& ' not found' , ierr )
!
END IF
! ... Input from file ?
!
END DO
nargs = iargc()
!
DO iiarg = 1, ( nargs - 1 )
!
CALL getarg( iiarg, input_file )
IF ( TRIM( input_file ) == '-input' .OR. &
TRIM( input_file ) == '-inp' .OR. &
TRIM( input_file ) == '-in' ) THEN
!
CALL getarg( ( iiarg + 1 ) , input_file )
OPEN ( UNIT = 5, FILE = input_file, FORM = 'FORMATTED', &
STATUS = 'OLD', IOSTAT = ierr )
CALL errore( 'iosys', 'input file ' // TRIM( input_file ) // &
& ' not found' , ierr )
!
END IF
!
END DO
!
! reading the namelist inputpp
!
READ (5, inputcond, err=200, iostat=ios )
200 CALL errore ('do_cond','reading inputcond namelist',ABS(ios))
tmp_dir=TRIM(outdir)
!
! Reading 2D k-point
READ(5, *, err=300, iostat=ios ) nkpts
ALLOCATE( xyk(2,nkpts) )
ALLOCATE( wkpt(nkpts) )
wtot = 0.d0
DO ik = 1, nkpts
READ(5, *, err=300, iostat=ios) xyk(1,ik), xyk(2,ik), wkpt(ik)
wtot = wtot + wkpt(ik)
ENDDO
DO ik = 1, nkpts
wkpt(ik) = wkpt(ik)/wtot
ENDDO
300 CALL errore ('do_cond','2D k-point',ABS(ios))
!
! reading the namelist inputpp
!
read (5, inputcond, err=200, iostat=ios )
200 call errore ('do_cond','reading inputcond namelist',abs(ios))
tmp_dir=trim(outdir)
!
! Reading 2D k-point
read(5, *, err=300, iostat=ios ) nkpts
allocate( xyk(2,nkpts) )
allocate( wkpt(nkpts) )
wtot = 0.d0
do ik = 1, nkpts
read(5, *, err=300, iostat=ios) xyk(1,ik), xyk(2,ik), wkpt(ik)
wtot = wtot + wkpt(ik)
enddo
do ik = 1, nkpts
wkpt(ik) = wkpt(ik)/wtot
enddo
300 call errore ('do_cond','2D k-point',abs(ios))
!
! To form the array of energies for calculation
!
read(5, *, err=400, iostat=ios ) nenergy
allocate( earr(nenergy) )
allocate( tran_tot(nenergy) )
if(abs(denergy).le.1.d-8) then
! the list of energies is read
do ien = 1, nenergy
read(5, *, err=400, iostat=ios) earr(ien)
enddo
else
! the array of energies is automatically formed
do ien = 1, nenergy
earr(ien) = energy0 + (ien-1)*denergy
tran_tot(ien) = 0.d0
enddo
endif
400 call errore ('do_cond','reading energy list',abs(ios))
#ifdef __PARA
end if
!
! To form the array of energies for calculation
!
READ(5, *, err=400, iostat=ios ) nenergy
ALLOCATE( earr(nenergy) )
ALLOCATE( tran_tot(nenergy) )
IF(ABS(denergy).LE.1.d-8) THEN
! the list of energies is read
DO ien = 1, nenergy
READ(5, *, err=400, iostat=ios) earr(ien)
ENDDO
ELSE
! the array of energies is automatically formed
DO ien = 1, nenergy
earr(ien) = energy0 + (ien-1)*denergy
tran_tot(ien) = 0.d0
ENDDO
ENDIF
400 CALL errore ('do_cond','reading energy list',ABS(ios))
!
END IF
!
! ... Broadcast variables
@ -170,88 +165,87 @@ implicit none
CALL mp_bcast( llapack, ionode_id )
CALL mp_bcast( nkpts, ionode_id )
CALL mp_bcast( nenergy, ionode_id )
if (me .ne. 1) then
allocate( xyk(2,nkpts) )
allocate( wkpt(nkpts) )
allocate( earr(nenergy) )
allocate( tran_tot(nenergy) )
endif
IF ( .NOT. ionode ) THEN
ALLOCATE( xyk(2,nkpts) )
ALLOCATE( wkpt(nkpts) )
ALLOCATE( earr(nenergy) )
ALLOCATE( tran_tot(nenergy) )
ENDIF
CALL mp_bcast( xyk, ionode_id )
CALL mp_bcast( wkpt, ionode_id )
CALL mp_bcast( earr, ionode_id )
CALL mp_bcast( tran_tot, ionode_id )
#endif
!
! Now allocate space for pwscf variables, read and check them.
!
call read_file
call openfil
call struc_fact (nat,tau,ntyp,ityp,ngm,g,bg, &
CALL read_file
CALL openfil
CALL struc_fact (nat,tau,ntyp,ityp,ngm,g,bg, &
nr1,nr2,nr3,strf,eigts1,eigts2,eigts3)
call init_us_1
call newd
CALL init_us_1
CALL newd
!
! Allocation for pwcond variables
!
call allocate_cond
call init_cond
call stop_clock('init')
CALL allocate_cond
CALL init_cond
CALL stop_clock('init')
if (llocal) &
call local_set(nocrosl,noinsl,noinss,nocrosr,noinsr,norb,norbs)
call poten
IF (llocal) &
CALL local_set(nocrosl,noinsl,noinss,nocrosr,noinsr,norb,norbs)
CALL poten
do ik=1, nkpts
DO ik=1, nkpts
call init_gper(ik)
CALL init_gper(ik)
!
! The main loop
!
eryd = earr(1)/rytoev + ef
call local
do ien=1, nenergy
CALL local
DO ien=1, nenergy
eryd = earr(ien)/rytoev + ef
call form_zk(n2d, nrzp, zkr, zk, eryd, tpiba)
CALL form_zk(n2d, nrzp, zkr, zk, eryd, tpiba)
orbin=1
orbfin=orbin-1+2*nocrosl+noinsl
call scatter_forw(bdl1, bdl2, orbin, orbfin)
CALL scatter_forw(bdl1, bdl2, orbin, orbfin)
orbin=1
orbfin=orbin-1+2*nocrosl+noinsl
call compbs(0, bdl1, bdl2, nocrosl, &
CALL compbs(0, bdl1, bdl2, nocrosl, &
orbfin-orbin+1, orbin, nchanl, kvall, &
kfunl, kfundl, kintl, kcoefl)
if (ikind.eq.2) then
IF (ikind.EQ.2) THEN
orbin=2*nocrosl+noinsl+noinss+1
orbfin=orbin-1+2*nocrosr+noinsr
call scatter_forw(bdr1, bdr2, orbin, orbfin)
CALL scatter_forw(bdr1, bdr2, orbin, orbfin)
orbin=2*nocrosl+noinsl+noinss+1
orbfin=orbin-1+2*nocrosr+noinsr
call compbs(1, bdr1, bdr2, nocrosr, &
CALL compbs(1, bdr1, bdr2, nocrosr, &
orbfin-orbin+1, orbin, nchanr, kvalr, &
kfunr, kfundr, kintr, kcoefr)
endif
call summary_band(ik,ien)
if (ikind.ne.0) then
ENDIF
CALL summary_band(ik,ien)
IF (ikind.NE.0) THEN
orbin=nocrosl+noinsl+1
orbfin=orbin-1+nocrosl+noinss+nocrosr
call scatter_forw(bdl2, bdr1, orbin, orbfin)
call transmit(ik,ien)
endif
enddo
call free_mem
CALL scatter_forw(bdl2, bdr1, orbin, orbfin)
CALL transmit(ik,ien)
ENDIF
ENDDO
CALL free_mem
enddo
ENDDO
if(ikind.gt.0.and.tran_file.ne.' ') &
call summary_tran(tran_file,nenergy,earr,tran_tot)
IF(ikind.GT.0.AND.tran_file.NE.' ') &
CALL summary_tran(tran_file,nenergy,earr,tran_tot)
call print_clock_pwcond()
call stop_clock('PWCOND')
return
end subroutine do_cond
CALL print_clock_pwcond()
CALL stop_clock('PWCOND')
RETURN
END SUBROUTINE do_cond

View File

@ -1,4 +1,3 @@
!
! Copyright (C) 2003 A. Smogunov
! This file is distributed under the terms of the
@ -8,100 +7,97 @@
!
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
!
#include "f_defs.h"
!
subroutine local
SUBROUTINE local
!
! This subroutine computes 2D eigenfunctions and eigenvalues for
! the local potential in each slab and performs 2D reduction of
! the plane wave basis set.
!
#include "f_defs.h"
USE io_global, ONLY : stdout
USE io_global, ONLY : stdout, ionode
USE pwcom
USE noncollin_module, ONLY : npol
USE io_files
USE cond
#ifdef __PARA
USE mp_global, ONLY: nproc
use para
USE mp_global, ONLY : nproc, me_pool, root_pool
USE parallel_include
#endif
implicit none
IMPLICIT NONE
integer :: i, il, j, jl, ixy, ig, jg, ipol, igper, k, &
INTEGER :: i, il, j, jl, ixy, ig, jg, ipol, igper, k, &
ios, index, number, nprob, nteam, nteamnow, &
status, info, kin, kfin, is, js
integer, allocatable :: fftxy(:,:)
real(kind=DP), parameter :: eps=1.d-6
real(kind=DP), allocatable :: el(:), gp(:)
complex(kind=DP), allocatable :: amat(:,:), amat1(:,:), ymat(:,:), &
INTEGER, ALLOCATABLE :: fftxy(:,:)
REAL(kind=DP), PARAMETER :: eps=1.d-6
REAL(kind=DP), ALLOCATABLE :: el(:), gp(:)
COMPLEX(kind=DP), ALLOCATABLE :: amat(:,:), amat1(:,:), ymat(:,:), &
psibase(:,:), psiprob(:,:)
complex(kind=DP),parameter :: one=(1.d0,0.d0), zero=(0.d0,0.d0)
complex(kind=DP) :: aij, xfact, ZDOTC
logical :: exst
COMPLEX(kind=DP),PARAMETER :: one=(1.d0,0.d0), zero=(0.d0,0.d0)
COMPLEX(kind=DP) :: aij, xfact, ZDOTC
LOGICAL :: exst
!
! To divide the slabs between CPU
!
call start_clock('local')
call slabcpu(nrz, nrzp, nkofz, bdl1, bdl2, bdr1, bdr2, z)
CALL start_clock('local')
CALL slabcpu(nrz, nrzp, nkofz, bdl1, bdl2, bdr1, bdr2, z)
!
! If all the information is already contained in the file it reads it.
!
if (lread_loc) then
call seqopn(4,fil_loc,'unformatted',exst)
if(.not.exst) call errore ('local','fil_loc not found',1)
read(4) n2d
IF (lread_loc) THEN
CALL seqopn(4,fil_loc,'unformatted',exst)
IF(.NOT.exst) CALL errore ('local','fil_loc not found',1)
READ(4) n2d
! Allocate variables depending on n2d
call allocate_cond_2
read(4) ((newbg(ig,il), ig=1, ngper*npol), il=1, n2d)
CALL allocate_cond_2
READ(4) ((newbg(ig,il), ig=1, ngper*npol), il=1, n2d)
! WRITE( stdout,*) 'ngper, n2d = ', ngper, n2d
read(4) (((psiper(ig,il,k),ig=1,n2d),il=1,n2d), &
READ(4) (((psiper(ig,il,k),ig=1,n2d),il=1,n2d), &
k=1,nrzp)
read(4) ((zkr(il,k),il=1,n2d),k=1,nrzp)
READ(4) ((zkr(il,k),il=1,n2d),k=1,nrzp)
close(unit=4)
return
endif
CLOSE(unit=4)
RETURN
ENDIF
allocate( gp( 2 ) )
allocate( el( ngper * npol ) )
allocate( amat( ngper * npol, ngper * npol ) )
allocate( psibase( ngper * npol, ngper * npol ) )
allocate( psiprob( ngper * npol, ngper * npol ) )
allocate( fftxy(-(nrx-1)/2:nrx/2,-(nry-1)/2:nry/2) )
ALLOCATE( gp( 2 ) )
ALLOCATE( el( ngper * npol ) )
ALLOCATE( amat( ngper * npol, ngper * npol ) )
ALLOCATE( psibase( ngper * npol, ngper * npol ) )
ALLOCATE( psiprob( ngper * npol, ngper * npol ) )
ALLOCATE( fftxy(-(nrx-1)/2:nrx/2,-(nry-1)/2:nry/2) )
!
! To form fftxy correspondence
!
do i=1, nrx
DO i=1, nrx
il=i-1
if (il.gt.nrx/2) il=il-nrx
do j=1, nry
IF (il.GT.nrx/2) il=il-nrx
DO j=1, nry
jl=j-1
if (jl.gt.nry/2) jl=jl-nry
IF (jl.GT.nry/2) jl=jl-nry
fftxy(il,jl)=i+(j-1)*nrx
enddo
enddo
ENDDO
ENDDO
!
! to find kin and kfin
!
do k=1, nrz
if (z(k).le.bdl1+eps) kin=k
if (z(k).le.bdr2-eps) kfin=k
enddo
DO k=1, nrz
IF (z(k).LE.bdl1+eps) kin=k
IF (z(k).LE.bdr2-eps) kfin=k
ENDDO
!
! Starting k and number of CPU
!
nteam=1
#ifdef __PARA
kin=kin+me-1
nteam=nproc
#endif
kin = kin + me_pool
nteam = nproc
!
! set and solve the eigenvalue equation for each slab
@ -110,31 +106,31 @@ subroutine local
nprob=0
psibase=(0.d0,0.d0)
do while(kin.le.kfin)
DO WHILE(kin.LE.kfin)
amat=(0.d0,0.d0)
do ig=1, ngper
do jg=ig, ngper
do ipol=1, 2
DO ig=1, ngper
DO jg=ig, ngper
DO ipol=1, 2
gp(ipol)=gper(ipol,ig)-gper(ipol,jg)
enddo
ENDDO
index=number(gp, at, fftxy, nrx, nry)
if (index.gt.0) then
do is=1,npol
do js=1,npol
IF (index.GT.0) THEN
DO is=1,npol
DO js=1,npol
amat(ig+(is-1)*ngper,jg+(js-1)*ngper)=vppot(kin,index,is,js)
amat(jg+(js-1)*ngper,ig+(is-1)*ngper)= &
DCONJG(amat(ig+(is-1)*ngper,jg+(js-1)*ngper))
enddo
enddo
endif
enddo
do is=1,npol
ENDDO
ENDDO
ENDIF
ENDDO
DO is=1,npol
amat(ig+(is-1)*ngper,ig+(is-1)*ngper)= &
amat(ig+(is-1)*ngper,ig+(is-1)*ngper)+ &
(gper(1,ig)**2 + gper(2,ig)**2)*tpiba2
enddo
enddo
call hev_ab(ngper*npol, amat, ngper*npol, el, psiprob, &
ENDDO
ENDDO
CALL hev_ab(ngper*npol, amat, ngper*npol, el, psiprob, &
-1.d1, eryd+ewind, nprob)
! do is=1,ngper*npol
@ -147,173 +143,173 @@ subroutine local
! stop
#ifdef __PARA
if ( me.ne.1 ) then
call mpi_send(nprob,1,MPI_INTEGER,0,17, &
IF ( me_pool == root_pool ) THEN
CALL mpi_send(nprob,1,MPI_INTEGER,0,17, &
MPI_COMM_WORLD,info )
call errore ('n2d reduction','info<>0 in send',info)
call mpi_send(psiprob,2*ngper*npol*ngper*npol,MPI_REAL8,0,18, &
CALL errore ('n2d reduction','info<>0 in send',info)
CALL mpi_send(psiprob,2*ngper*npol*ngper*npol,MPI_REAL8,0,18, &
MPI_COMM_WORLD,info )
call errore ('n2d reduction','info<>0 in send',info)
else
call gramsh(ngper*npol,nprob,1,nprob, &
CALL errore ('n2d reduction','info<>0 in send',info)
ELSE
CALL gramsh(ngper*npol,nprob,1,nprob, &
psibase,psiprob,n2d,epsproj)
nteamnow=kfin-kin+1
if(nteamnow.gt.nteam) nteamnow=nteam
IF(nteamnow.GT.nteam) nteamnow=nteam
do ig=1, nteamnow-1
call mpi_recv(nprob,1,MPI_INTEGER, &
DO ig=1, nteamnow-1
CALL mpi_recv(nprob,1,MPI_INTEGER, &
ig,17,MPI_COMM_WORLD,status,info )
call errore ('n2d reduction','info<>0 in recv',info)
call mpi_recv(psiprob,2*ngper*npol*ngper*npol,MPI_REAL8, &
CALL errore ('n2d reduction','info<>0 in recv',info)
CALL mpi_recv(psiprob,2*ngper*npol*ngper*npol,MPI_REAL8, &
ig,18,MPI_COMM_WORLD,status,info )
call errore ('n2d reduction','info<>0 in recv',info)
call gramsh(ngper*npol,nprob,1,nprob, &
CALL errore ('n2d reduction','info<>0 in recv',info)
CALL gramsh(ngper*npol,nprob,1,nprob, &
psibase,psiprob,n2d,epsproj)
enddo
endif
ENDDO
ENDIF
#else
call gramsh(ngper*npol,nprob,1,nprob,psibase,psiprob,n2d,epsproj)
CALL gramsh(ngper*npol,nprob,1,nprob,psibase,psiprob,n2d,epsproj)
#endif
kin=kin+nteam
enddo
ENDDO
#ifdef __PARA
call mpi_barrier( MPI_COMM_WORLD, info )
call mpi_bcast(n2d,1,MPI_INTEGER,0,MPI_COMM_WORLD,info)
call errore ('reduction','mpi_bcast 1',info)
call mpi_bcast(psibase,2*ngper*npol*ngper*npol,MPI_REAL8,0, &
CALL mpi_barrier( )
CALL mpi_bcast(n2d,1,MPI_INTEGER,0,MPI_COMM_WORLD,info)
CALL errore ('reduction','mpi_bcast 1',info)
CALL mpi_bcast(psibase,2*ngper*npol*ngper*npol,MPI_REAL8,0, &
MPI_COMM_WORLD,info)
call errore ('reduction','mpi_bcast 1',info)
CALL errore ('reduction','mpi_bcast 1',info)
#endif
!
! Allocate variables depending on n2d
!
call allocate_cond_2
if (npol.eq.2) then
CALL allocate_cond_2
IF (npol.EQ.2) THEN
WRITE( stdout,*) 'ngper, ngper*npol, n2d = ', ngper, ngper*npol, n2d
else
ELSE
WRITE( stdout,*) 'ngper, n2d = ', ngper, n2d
endif
ENDIF
!
! Construct components of basis vector set on G_per
!
call DCOPY(2*ngper*npol*n2d,psibase,1,newbg,1)
CALL DCOPY(2*ngper*npol*n2d,psibase,1,newbg,1)
!
! set and solve the eigenvalue equation for each slab
!
allocate( amat1( n2d, n2d ) )
allocate( ymat( ngper*npol, n2d ) )
ALLOCATE( amat1( n2d, n2d ) )
ALLOCATE( ymat( ngper*npol, n2d ) )
! for reduced basis set H'_{ab}=e*^i_aH_{ij}e^j_b
do k=1, nrz
if(nkofz(k).ne.0) then
DO k=1, nrz
IF(nkofz(k).NE.0) THEN
ymat=(0.d0,0.d0)
!
! First compute y_{ib}=H_{ij}e_{jb}
!
do ig=1, ngper
do jg=1, ngper
do ipol=1, 2
DO ig=1, ngper
DO jg=1, ngper
DO ipol=1, 2
gp(ipol) = gper(ipol,ig) - gper(ipol,jg)
enddo
ENDDO
index=number(gp, at, fftxy, nrx, nry)
do is=1,npol
do js=1,npol
if (index.gt.0) then
DO is=1,npol
DO js=1,npol
IF (index.GT.0) THEN
aij=vppot(k,index,is,js)
else
ELSE
aij=(0.d0,0.d0)
endif
if ((ig.eq.jg).and.(is.eq.js)) &
ENDIF
IF ((ig.EQ.jg).AND.(is.EQ.js)) &
aij=aij+(gper(1,ig)**2+ &
gper(2,ig)**2)*tpiba2
amat(ig+(is-1)*ngper,jg+(js-1)*ngper)= aij
enddo
enddo
enddo
enddo
call ZGEMM('n','n',ngper*npol,n2d,ngper*npol,one,amat,ngper*npol, &
ENDDO
ENDDO
ENDDO
ENDDO
CALL ZGEMM('n','n',ngper*npol,n2d,ngper*npol,one,amat,ngper*npol, &
newbg,ngper*npol,zero,ymat,ngper*npol)
!
! and construct H'_{ab}=<e_a|y_b>
!
do il=1, n2d
do jl=il, n2d
DO il=1, n2d
DO jl=il, n2d
amat1(il,jl)=ZDOTC(ngper*npol,newbg(1,il),1,ymat(1,jl),1)
amat1(jl,il)=conjg(amat1(il,jl))
enddo
enddo
amat1(jl,il)=CONJG(amat1(il,jl))
ENDDO
ENDDO
!
! Solving the eigenvalue problem and construction zk
!
info=-1
call hev_ab(n2d, amat1, n2d, zkr(1,nkofz(k)), &
CALL hev_ab(n2d, amat1, n2d, zkr(1,nkofz(k)), &
psiper(1,1,nkofz(k)), 0.d0, 0.d0, info)
endif
enddo
ENDIF
ENDDO
#ifdef __PARA
call mpi_barrier( MPI_COMM_WORLD, info )
CALL mpi_barrier()
#endif
!
! saving the 2D data on the file if lwrite_loc=.t.
!
if (lwrite_loc) then
if(fil_loc.eq.' ') call errore ('local','fil_loc no name',1)
call seqopn(4,fil_loc,'unformatted',exst)
write(4) n2d
write(4) ((newbg(ig,il), ig=1, ngper*npol), il=1, n2d)
IF (lwrite_loc) THEN
IF(fil_loc.EQ.' ') CALL errore ('local','fil_loc no name',1)
CALL seqopn(4,fil_loc,'unformatted',exst)
WRITE(4) n2d
WRITE(4) ((newbg(ig,il), ig=1, ngper*npol), il=1, n2d)
write(4) (((psiper(ig,il,k),ig=1,n2d),il=1,n2d), &
WRITE(4) (((psiper(ig,il,k),ig=1,n2d),il=1,n2d), &
k=1,nrzp)
write(4) ((zkr(il,k),il=1,n2d),k=1,nrzp)
close(unit=4)
endif
WRITE(4) ((zkr(il,k),il=1,n2d),k=1,nrzp)
CLOSE(unit=4)
ENDIF
deallocate(amat)
deallocate(amat1)
deallocate(ymat)
deallocate(gp)
deallocate(psibase)
deallocate(psiprob)
deallocate(el)
deallocate(fftxy)
DEALLOCATE(amat)
DEALLOCATE(amat1)
DEALLOCATE(ymat)
DEALLOCATE(gp)
DEALLOCATE(psibase)
DEALLOCATE(psiprob)
DEALLOCATE(el)
DEALLOCATE(fftxy)
call stop_clock('local')
return
end subroutine local
CALL stop_clock('local')
RETURN
END SUBROUTINE local
!-----------------------------------
function number(gp, at, fftxy, nrx, nry)
FUNCTION number(gp, at, fftxy, nrx, nry)
!
! This function receives as input the coordinates of 2D g vector
! and write on output its fft position.
!
implicit none
integer :: nrx, nry, fftxy(-(nrx-1)/2:nrx/2, -(nry-1)/2:nry/2), &
IMPLICIT NONE
INTEGER :: nrx, nry, fftxy(-(nrx-1)/2:nrx/2, -(nry-1)/2:nry/2), &
number, n1, n2
real(kind=kind(0.d0)) :: gp(2), at(3,3), x1, x2
real(kind=kind(0.d0)), parameter :: eps=1.d-4
REAL(kind=KIND(0.d0)) :: gp(2), at(3,3), x1, x2
REAL(kind=KIND(0.d0)), PARAMETER :: eps=1.d-4
x1=gp(1)*at(1,1)+gp(2)*at(2,1)
x2=gp(1)*at(1,2)+gp(2)*at(2,2)
n1=nint(x1)
n2=nint(x2)
if (n1.le.nrx/2.and.n1.ge.-(nrx-1)/2.and. &
n2.le.nry/2.and.n2.ge.-(nry-1)/2) then
n1=NINT(x1)
n2=NINT(x2)
IF (n1.LE.nrx/2.AND.n1.GE.-(nrx-1)/2.AND. &
n2.LE.nry/2.AND.n2.GE.-(nry-1)/2) THEN
number=fftxy(n1,n2)
else
ELSE
!
! The g vector is not inside the 2D mesh
!
number=-1
endif
ENDIF
return
end function number
RETURN
END FUNCTION number

View File

@ -8,131 +8,131 @@
!
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
!
#include "f_defs.h"
!
subroutine poten
SUBROUTINE poten
!
! This subroutine computes the 2D Fourier components of the
! local potential in each slab.
!
#include "f_defs.h"
use pwcom
use noncollin_module, ONLY : noncolin, npol
use cond
#ifdef __PARA
use para
#endif
implicit none
USE pwcom
USE noncollin_module, ONLY : noncolin, npol
USE cond
USE mp_global, ONLY : me_pool
USE pfft, ONLY : npp
integer :: &
IMPLICIT NONE
INTEGER :: &
i, j, ij, ijx, k, n, p, il, ik, kstart, klast, &
ix, jx, kx, ir, ir1, ixy, info
integer :: iis, jjs, is(4), js(4), ispin, nspin_eff
integer :: ionode_id
integer, allocatable :: ipiv(:)
INTEGER :: iis, jjs, is(4), js(4), ispin, nspin_eff
INTEGER :: ionode_id
INTEGER, ALLOCATABLE :: ipiv(:)
real(kind=DP), parameter :: eps = 1.d-8
real(kind=DP) :: arg, bet
real(kind=DP), allocatable :: gz(:), auxr(:)
REAL(kind=DP), PARAMETER :: eps = 1.d-8
REAL(kind=DP) :: arg, bet
REAL(kind=DP), ALLOCATABLE :: gz(:), auxr(:)
complex(kind=DP), parameter :: cim = (0.d0,1.d0)
complex(kind=DP) :: caux
complex(kind=DP), allocatable :: aux(:), amat(:,:), amat0(:,:)
complex(kind=DP), allocatable :: vppot0(:,:,:,:)
COMPLEX(kind=DP), PARAMETER :: cim = (0.d0,1.d0)
COMPLEX(kind=DP) :: caux
COMPLEX(kind=DP), ALLOCATABLE :: aux(:), amat(:,:), amat0(:,:)
COMPLEX(kind=DP), ALLOCATABLE :: vppot0(:,:,:,:)
logical :: lg
LOGICAL :: lg
call start_clock('poten')
allocate( ipiv( nrz ) )
allocate( gz( nrz ) )
allocate( aux( nrx1*nrx2*nrx3 ) )
allocate( auxr( nrxx ) )
allocate( amat( nrz, nrz ) )
allocate( amat0( nrz, nrz ) )
CALL start_clock('poten')
ALLOCATE( ipiv( nrz ) )
ALLOCATE( gz( nrz ) )
ALLOCATE( aux( nrx1*nrx2*nrx3 ) )
ALLOCATE( auxr( nrxx ) )
ALLOCATE( amat( nrz, nrz ) )
ALLOCATE( amat0( nrz, nrz ) )
!
! Compute the Gz vectors in the z direction
!
do k = 1, nrz
DO k = 1, nrz
il = k-1
if (il.gt.nrz/2) il = il-nrz
IF (il.GT.nrz/2) il = il-nrz
gz(k) = il*bg(3,3)
enddo
ENDDO
!
! set up the matrix for the linear system
!
do n=1,nrz
do p=1,nrz
DO n=1,nrz
DO p=1,nrz
arg=gz(n)*z(p)*tpi
bet=gz(n)*(z(p+1)-z(p))*tpi
if (abs(gz(n)).gt.eps) then
caux=cim*(CMPLX(cos(bet),-sin(bet))-(1.d0,0.d0)) &
IF (ABS(gz(n)).GT.eps) THEN
caux=cim*(CMPLX(COS(bet),-SIN(bet))-(1.d0,0.d0)) &
/zl/gz(n)/tpi
else
ELSE
caux=(z(p+1)-z(p))/zl
endif
amat0(n,p)=CMPLX(cos(arg),-sin(arg))*caux
enddo
enddo
if (noncolin) then
ENDIF
amat0(n,p)=CMPLX(COS(arg),-SIN(arg))*caux
ENDDO
ENDDO
IF (noncolin) THEN
nspin_eff=4
ij=0
do iis=1,2
do jjs=1,2
DO iis=1,2
DO jjs=1,2
ij=ij+1
is(ij)=iis
js(ij)=jjs
enddo
enddo
else
ENDDO
ENDDO
ELSE
nspin_eff=1
is(1)=1
js(1)=1
endif
ENDIF
!
! To form local potential on the real space mesh
!
vppot = 0.d0
do ispin=1,nspin_eff
if (noncolin) then
if (ispin==1) then
DO ispin=1,nspin_eff
IF (noncolin) THEN
IF (ispin==1) THEN
auxr(:) = vltot(:)+vr(:,1)
else
ELSE
auxr(:) = vr(:,ispin)
endif
else
ENDIF
ELSE
auxr(:) = vltot(:) + vr(:,iofspin)
endif
ENDIF
!
! To collect the potential from different CPUs
!
aux(:) = (0.d0,0.d0)
#ifdef __PARA
kstart = 1
do i=1, me-1
DO i=1, me_pool
kstart = kstart+npp(i)
enddo
klast = kstart+npp(me)-1
ENDDO
klast = kstart+npp(me_pool+1)-1
#endif
do i = 1, nrx1*nrx2
do k = 1, nr3
lg = .true.
DO i = 1, nrx1*nrx2
DO k = 1, nr3
lg = .TRUE.
ir = i+(k-1)*nrx2*nrx1
ir1 = ir
#ifdef __PARA
if(k.ge.kstart.and.k.le.klast) then
lg = .true.
IF(k.GE.kstart.AND.k.LE.klast) THEN
lg = .TRUE.
ir1 = i+(k-kstart)*nrx2*nrx1
else
lg = .false.
endif
ELSE
lg = .FALSE.
ENDIF
#endif
if (lg) then
IF (lg) THEN
aux(ir) = auxr(ir1)
endif
enddo
enddo
ENDIF
ENDDO
ENDDO
#ifdef __PARA
call reduce (2*nrx1*nrx2*nrx3,aux)
CALL reduce (2*nrx1*nrx2*nrx3,aux)
#endif
@ -143,74 +143,74 @@ do ispin=1,nspin_eff
!
! This FFT is needed to make a non-parallel FFT in the parallel case
!
call cft3sp(aux,nr1,nr2,nr3,nrx1,nrx2,nrx3,-1)
CALL cft3sp(aux,nr1,nr2,nr3,nrx1,nrx2,nrx3,-1)
#else
call cft3(aux,nr1,nr2,nr3,nrx1,nrx2,nrx3,-1)
CALL cft3(aux,nr1,nr2,nr3,nrx1,nrx2,nrx3,-1)
#endif
do i = 1, nrx
if(i.gt.nrx/2+1) then
DO i = 1, nrx
IF(i.GT.nrx/2+1) THEN
ix = nr1-(nrx-i)
else
ELSE
ix = i
endif
do j = 1, nry
if(j.gt.nry/2+1) then
ENDIF
DO j = 1, nry
IF(j.GT.nry/2+1) THEN
jx = nr2-(nry-j)
else
ELSE
jx = j
endif
ENDIF
ij = i+(j-1)*nrx
ijx = ix+(jx-1)*nrx1
do k = 1, nrz
DO k = 1, nrz
il = k-1
if (il.gt.nrz/2) il = il-nrz
if(il.le.nr3/2.and.il.ge.-(nr3-1)/2) then
IF (il.GT.nrz/2) il = il-nrz
IF(il.LE.nr3/2.AND.il.GE.-(nr3-1)/2) THEN
if(k.gt.nrz/2+1) then
IF(k.GT.nrz/2+1) THEN
kx = nr3-(nrz-k)
else
ELSE
kx = k
endif
ENDIF
vppot(k, ij, is(ispin), js(ispin)) = aux(ijx+(kx-1)*nrx1*nrx2)
endif
enddo
enddo
enddo
ENDIF
ENDDO
ENDDO
ENDDO
!
! solve the linear system
!
amat=amat0
call ZGESV(nrz, nrx*nry, amat, nrz, ipiv, vppot(1,1,is(ispin),js(ispin)),&
CALL ZGESV(nrz, nrx*nry, amat, nrz, ipiv, vppot(1,1,is(ispin),js(ispin)),&
nrz, info)
call errore ('poten','info different from zero',abs(info))
enddo
CALL errore ('poten','info different from zero',ABS(info))
ENDDO
if (noncolin) then
allocate( vppot0(nrz, nrx * nry, npol, npol) )
IF (noncolin) THEN
ALLOCATE( vppot0(nrz, nrx * nry, npol, npol) )
vppot0=vppot
vppot(:,:,1,1)=vppot0(:,:,1,1)+vppot0(:,:,2,2)
vppot(:,:,1,2)=vppot0(:,:,1,2)-(0.d0,1.d0)*vppot0(:,:,2,1)
vppot(:,:,2,1)=vppot0(:,:,1,2)+(0.d0,1.d0)*vppot0(:,:,2,1)
vppot(:,:,2,2)=vppot0(:,:,1,1)-vppot0(:,:,2,2)
deallocate( vppot0 )
endif
DEALLOCATE( vppot0 )
ENDIF
! do p = 1, nrz
! write(6,'(i5,2f12.6)') p, real(vppot(p,105,2,2)), imag(vppot(p,105,2,2))
! enddo
! stop
deallocate(ipiv)
deallocate(gz)
deallocate(aux)
deallocate(auxr)
deallocate(amat)
deallocate(amat0)
DEALLOCATE(ipiv)
DEALLOCATE(gz)
DEALLOCATE(aux)
DEALLOCATE(auxr)
DEALLOCATE(amat)
DEALLOCATE(amat0)
call stop_clock('poten')
CALL stop_clock('poten')
return
end subroutine poten
RETURN
END SUBROUTINE poten

View File

@ -9,8 +9,9 @@
! Optimized Aug. 2004 (ADC)
! Generalized to spinor wavefunctions and spin-orbit Oct. 2004 (ADC).
!
#include "f_defs.h"
!
subroutine scatter_forw(zin, zfin, orbin, orbfin)
SUBROUTINE scatter_forw(zin, zfin, orbin, orbfin)
!
! This subroutine computes local Phi_n and partial nonlocal Phi_alpha
! solutions of the Schrodinger equation in the region zin<z<zfin
@ -19,14 +20,14 @@ subroutine scatter_forw(zin, zfin, orbin, orbfin)
! It computes also the integrals (intw1, intw2) of Phi_n and
! Phi_alpha over beta-functions inside the unit cell.
!
#include "f_defs.h"
use pwcom
use noncollin_module, ONLY : npol
use para
use cond
implicit none
integer :: &
USE pwcom
USE noncollin_module, ONLY : npol
USE cond
!
IMPLICIT NONE
INTEGER :: &
orbin, & ! starting orbital in zin<z<zfin
orbfin, & ! final orbital in zin<z<zfin
norbnow, & ! total number of orbitals in zin<z<zfin
@ -36,15 +37,15 @@ implicit none
lastk, & ! last slab for a given CPU
k, kz, n, lam, ig, lam1, mdim, itt, nbb, iorb, iorb1, &
iorbs, iorb1s, iorba, iorb1a, is, kkz, nok
integer :: info
integer, allocatable :: inslab(:)
real(kind=DP), parameter :: eps=1.d-8
real(kind=DP) :: DDOT, zin, zfin, dz, tr, tr1, dz1
complex(kind=DP), parameter :: cim=(0.d0,1.d0), one=(1.d0, 0.d0), &
INTEGER :: info
INTEGER, ALLOCATABLE :: inslab(:)
REAL(kind=DP), PARAMETER :: eps=1.d-8
REAL(kind=DP) :: DDOT, zin, zfin, dz, tr, tr1, dz1
COMPLEX(kind=DP), PARAMETER :: cim=(0.d0,1.d0), one=(1.d0, 0.d0), &
zero=(0.d0,0.d0)
complex(kind=DP) :: int1d, int2d, c, d, e, f, s1, s2, s3, s4, arg,&
COMPLEX(kind=DP) :: int1d, int2d, c, d, e, f, s1, s2, s3, s4, arg,&
f1p, ZDOTC, fact, factm
complex(kind=DP), allocatable :: &
COMPLEX(kind=DP), ALLOCATABLE :: &
psigper(:,:), & ! psigper(g,lam)=newbg(g,lam1) psiper(lam1,lam)
w0(:,:,:), & ! w0(z,g,m) are 2D Fourier components (see four.f)
w(:,:,:), & ! w(z,lam,m)=psigper(g,lam)^* \exp{-igr^m_perp}
@ -64,22 +65,22 @@ implicit none
s3m(:,:), s4m(:,:), s5m(:,:), s6m(:,:), s7m(:,:), s8m(:,:), &
ezk1(:,:), emzk1(:,:)
call start_clock('scatter_forw')
CALL start_clock('scatter_forw')
norbnow = orbfin - orbin + 1
orbin = orbin - 1
!
! Find first and last slab (kin, kfin)
!
do k=1, nrz
if (z(k).le.zin+eps) kin=k
if (z(k).le.zfin-eps) kfin=k
enddo
DO k=1, nrz
IF (z(k).LE.zin+eps) kin=k
IF (z(k).LE.zfin-eps) kfin=k
ENDDO
dz=z(kin+1)-z(kin)
dz1=dz/nz1
!
! Divide the slabs among CPU
!
call divide(kfin-kin+1,startk,lastk)
CALL divide(kfin-kin+1,startk,lastk)
startk=kin+startk-1
lastk=kin+lastk-1
@ -87,181 +88,181 @@ implicit none
! Start of 2D Fourier components calculations and depending
! variables
!
allocate( psigper( ngper*npol, n2d ) )
allocate( w( nz1, n2d, norbnow*npol ) )
allocate( w0( nz1, ngper, 5 ) )
allocate( cix( nz1, n2d, norbnow*npol ) )
allocate( dix( nz1, n2d, norbnow*npol ) )
allocate( ci( norbnow*npol, n2d, nrzp ) )
allocate( di( norbnow*npol, n2d, nrzp ) )
allocate( inslab( norbnow ) )
allocate( ezk( n2d, nrzp ) )
allocate( emzk( n2d, nrzp ) )
allocate( ezk1( nz1, n2d ) )
allocate( emzk1( nz1, n2d ) )
allocate( zk2( n2d, nrzp ) )
ALLOCATE( psigper( ngper*npol, n2d ) )
ALLOCATE( w( nz1, n2d, norbnow*npol ) )
ALLOCATE( w0( nz1, ngper, 5 ) )
ALLOCATE( cix( nz1, n2d, norbnow*npol ) )
ALLOCATE( dix( nz1, n2d, norbnow*npol ) )
ALLOCATE( ci( norbnow*npol, n2d, nrzp ) )
ALLOCATE( di( norbnow*npol, n2d, nrzp ) )
ALLOCATE( inslab( norbnow ) )
ALLOCATE( ezk( n2d, nrzp ) )
ALLOCATE( emzk( n2d, nrzp ) )
ALLOCATE( ezk1( nz1, n2d ) )
ALLOCATE( emzk1( nz1, n2d ) )
ALLOCATE( zk2( n2d, nrzp ) )
intw1=(0.d0,0.d0)
intw2=(0.d0,0.d0)
do k=startk,lastk
DO k=startk,lastk
kz=nkofz(k)
do lam=1,n2d
DO lam=1,n2d
arg=cim*tpi*zk(lam, kz)*dz
ezk(lam,kz)=exp(arg)
emzk(lam,kz)=exp(-arg)
ezk(lam,kz)=EXP(arg)
emzk(lam,kz)=EXP(-arg)
arg=cim*tpi*zk(lam, kz)*dz1
zk2(lam,kz)=cim/(2.d0*zk(lam,kz)*tpiba)
enddo
enddo
ENDDO
ENDDO
!
! some orbitals relations
!
do iorb=1, norbnow
DO iorb=1, norbnow
inslab(iorb)=0
enddo
do iorb=1, norbnow
ENDDO
DO iorb=1, norbnow
iorbs=orbin+iorb
if(inslab(iorb).eq.0.and.mnew(iorbs).eq.1) then
IF(inslab(iorb).EQ.0.AND.mnew(iorbs).EQ.1) THEN
itt=itnew(iorbs)
nbb=nbnew(iorbs)
do iorb1=iorb, norbnow
DO iorb1=iorb, norbnow
iorb1s=orbin+iorb1
if(mnew(iorb1s).eq.1.and.nbnew(iorb1s).eq.nbb) then
tr=abs(taunew(3,iorbs)-taunew(3,iorb1s))
if(itnew(iorb1s).eq.itt.and.tr.le.eps) then
IF(mnew(iorb1s).EQ.1.AND.nbnew(iorb1s).EQ.nbb) THEN
tr=ABS(taunew(3,iorbs)-taunew(3,iorb1s))
IF(itnew(iorb1s).EQ.itt.AND.tr.LE.eps) THEN
inslab(iorb1)=iorb
endif
endif
enddo
endif
enddo
ENDIF
ENDIF
ENDDO
ENDIF
ENDDO
call start_clock('integrals')
CALL start_clock('integrals')
!
! The loop over slabs to compute ci, di, and initial intw2
!
do k=startk, lastk
DO k=startk, lastk
! write(6,*) 'integrals k=', k
kkz=nkofz(k)
do lam=1,n2d
DO lam=1,n2d
arg=cim*zk(lam,kkz)*dz1*tpi
fact=exp(arg)
factm=exp(-arg)
fact=EXP(arg)
factm=EXP(-arg)
ezk1(1,lam)=fact
emzk1(1,lam)=factm
do k1=2,nz1
DO k1=2,nz1
ezk1(k1,lam)=ezk1(k1-1,lam)*fact
emzk1(k1,lam)=emzk1(k1-1,lam)*factm
enddo
enddo
ENDDO
ENDDO
call ZGEMM('n', 'n', ngper*npol, n2d, n2d, one, newbg, ngper*npol, &
CALL ZGEMM('n', 'n', ngper*npol, n2d, n2d, one, newbg, ngper*npol, &
psiper(1,1,kkz), n2d, zero, psigper, ngper*npol)
w=(0.d0,0.d0)
do iorb=1, norbnow
DO iorb=1, norbnow
iorbs=orbin+iorb
if(cross(iorbs,k).eq.1.and.inslab(iorb).eq.iorb) then
IF(cross(iorbs,k).EQ.1.AND.inslab(iorb).EQ.iorb) THEN
mdim=2*ls(iorbs)+1
call four(iorbs, w0, k, dz)
do iorb1=1, norbnow
CALL four(iorbs, w0, k, dz)
DO iorb1=1, norbnow
iorb1s=orbin+iorb1
if(inslab(iorb1).eq.iorb) then
do ig=1, ngper
IF(inslab(iorb1).EQ.iorb) THEN
DO ig=1, ngper
tr=-tpi*DDOT(2,gper(1,ig),1,taunew(1,iorb1s),1)
c=cmplx(cos(tr),sin(tr))
do lam=1, n2d
do is=1,npol
c=CMPLX(COS(tr),SIN(tr))
DO lam=1, n2d
DO is=1,npol
d=CONJG(psigper(ngper*(is-1)+ig,lam))*c
do n=1, mdim
do kz=1, nz1
DO n=1, mdim
DO kz=1, nz1
w(kz,lam,npol*(iorb1-2+n)+is)= &
w(kz,lam,npol*(iorb1-2+n)+is)+d*w0(kz,ig,n)
enddo
enddo
enddo
enddo
enddo
endif
enddo
endif
enddo
do iorb=1, norbnow*npol
ENDDO
ENDDO
ENDDO
ENDDO
ENDDO
ENDIF
ENDDO
ENDIF
ENDDO
DO iorb=1, norbnow*npol
iorba=iorb
if (npol.eq.2) iorba=(iorb+1)/2
IF (npol.EQ.2) iorba=(iorb+1)/2
iorbs = orbin + iorba
if (cross(iorbs,k).eq.1) then
do lam=1, n2d
call setint(w(1,lam,iorb),cix(1,lam,iorb),dix(1,lam,iorb), &
IF (cross(iorbs,k).EQ.1) THEN
DO lam=1, n2d
CALL setint(w(1,lam,iorb),cix(1,lam,iorb),dix(1,lam,iorb), &
ezk1(1,lam), emzk1(1,lam), nz1)
enddo
endif
enddo
do iorb=1, norbnow*npol
ENDDO
ENDIF
ENDDO
DO iorb=1, norbnow*npol
iorba=iorb
if (npol.eq.2) iorba=(iorb+1)/2
IF (npol.EQ.2) iorba=(iorb+1)/2
iorbs = orbin + iorba
if (cross(iorbs,k).eq.1) then
do lam=1, n2d
IF (cross(iorbs,k).EQ.1) THEN
DO lam=1, n2d
ci(iorb,lam,kkz)=int1d(w(1,lam,iorb), &
zk(lam,kkz),dz,dz1,nz1,tpiba,1)
di(iorb,lam,kkz)=int1d(w(1,lam,iorb), &
zk(lam,kkz),dz,dz1,nz1,tpiba,-1)
enddo
do iorb1=1, norbnow*npol
ENDDO
DO iorb1=1, norbnow*npol
iorb1a=iorb1
if (npol.eq.2) iorb1a=(iorb1+1)/2
IF (npol.EQ.2) iorb1a=(iorb1+1)/2
iorb1s = orbin + iorb1a
if (cross(iorb1s,k).eq.1) then
do lam=1, n2d
IF (cross(iorb1s,k).EQ.1) THEN
DO lam=1, n2d
intw2(iorb,iorb1)=intw2(iorb,iorb1)- &
int2d(w(1,lam,iorb),w(1,lam,iorb1),cix(1,lam,iorb1), &
dix(1,lam,iorb1),ezk1(1,lam),emzk1(1,lam), &
zk(lam,kkz),dz1,tpiba,nz1)*zk2(lam,kkz)
enddo
endif
enddo
endif
enddo
enddo
ENDDO
ENDIF
ENDDO
ENDIF
ENDDO
ENDDO
call stop_clock('integrals')
CALL stop_clock('integrals')
deallocate(psigper)
deallocate(cix)
deallocate(dix)
deallocate(w)
deallocate(w0)
deallocate(inslab)
DEALLOCATE(psigper)
DEALLOCATE(cix)
DEALLOCATE(dix)
DEALLOCATE(w)
DEALLOCATE(w0)
DEALLOCATE(inslab)
!-----------------------------------
!
! Some allocation for iterative process
!
allocate( bf( n2d, n2d ) )
allocate( an( n2d, n2d ) )
allocate( bn( n2d, n2d ) )
allocate( app( n2d, n2d ) )
allocate( bpp( n2d, n2d ) )
allocate( al( n2d, n2d ) )
allocate( bl( n2d, n2d ) )
allocate( af( n2d, n2d ) )
allocate( s1m( n2d, n2d ) )
allocate( s2m( n2d, n2d ) )
allocate( s3m( n2d, n2d ) )
allocate( s4m( n2d, n2d ) )
allocate( bnlf( n2d, norbnow*npol ) )
allocate( anln( n2d, norbnow*npol ) )
allocate( bnln( n2d, norbnow*npol ) )
allocate( anlp( n2d, norbnow*npol ) )
allocate( bnlp( n2d, norbnow*npol ) )
allocate( anll( n2d, norbnow*npol ) )
allocate( ff( n2d, norbnow*npol ) )
allocate( fl( n2d, norbnow*npol ) )
ALLOCATE( bf( n2d, n2d ) )
ALLOCATE( an( n2d, n2d ) )
ALLOCATE( bn( n2d, n2d ) )
ALLOCATE( app( n2d, n2d ) )
ALLOCATE( bpp( n2d, n2d ) )
ALLOCATE( al( n2d, n2d ) )
ALLOCATE( bl( n2d, n2d ) )
ALLOCATE( af( n2d, n2d ) )
ALLOCATE( s1m( n2d, n2d ) )
ALLOCATE( s2m( n2d, n2d ) )
ALLOCATE( s3m( n2d, n2d ) )
ALLOCATE( s4m( n2d, n2d ) )
ALLOCATE( bnlf( n2d, norbnow*npol ) )
ALLOCATE( anln( n2d, norbnow*npol ) )
ALLOCATE( bnln( n2d, norbnow*npol ) )
ALLOCATE( anlp( n2d, norbnow*npol ) )
ALLOCATE( bnlp( n2d, norbnow*npol ) )
ALLOCATE( anll( n2d, norbnow*npol ) )
ALLOCATE( ff( n2d, norbnow*npol ) )
ALLOCATE( fl( n2d, norbnow*npol ) )
!
! We set up the starting values
@ -279,256 +280,256 @@ implicit none
ff=(0.d0,0.d0)
fl=(0.d0,0.d0)
do lam=1, n2d
DO lam=1, n2d
bf(lam,lam)=(1.d0,0.d0)
bpp(lam,lam)=(1.d0,0.d0)
enddo
ENDDO
!
! To compute intw1, ff, fl for the first slab
!
kz=nkofz(startk)
do iorb=1, norbnow*npol
DO iorb=1, norbnow*npol
iorba=iorb
if (npol.eq.2) iorba=(iorb+1)/2
IF (npol.EQ.2) iorba=(iorb+1)/2
iorbs = orbin + iorba
if (cross(iorbs, startk).eq.1) then
do lam=1, n2d
IF (cross(iorbs, startk).EQ.1) THEN
DO lam=1, n2d
intw1(iorb,lam)=di(iorb, lam, kz)
arg=ezk(lam, kz)
if (abs(DIMAG(zk(lam, kz))).lt.eps) then
IF (ABS(DIMAG(zk(lam, kz))).LT.eps) THEN
ff(lam,iorb)=-arg*CONJG(di(iorb,lam,kz))*zk2(lam,kz)
fl(lam,iorb)=-arg*CONJG(ci(iorb,lam,kz))*zk2(lam,kz)
else
ELSE
ff(lam,iorb)=-CONJG(ci(iorb,lam,kz))*zk2(lam,kz)
fl(lam,iorb)=-CONJG(di(iorb,lam,kz))*zk2(lam,kz)
endif
enddo
endif
enddo
ENDIF
ENDDO
ENDIF
ENDDO
!------------------------------------
! The main loop over slabs
!
do k=startk+1, lastk
call start_clock('scatter')
DO k=startk+1, lastk
CALL start_clock('scatter')
kz=nkofz(k)
do iorb=1, norbnow*npol
DO iorb=1, norbnow*npol
iorba=iorb
if (npol.eq.2) iorba=(iorb+1)/2
IF (npol.EQ.2) iorba=(iorb+1)/2
iorbs = orbin + iorba
tr=taunew(3,iorbs)-rsph(nbnew(iorbs),itnew(iorbs))
if (z(k)+dz.gt.tr) nok=iorb
enddo
do lam=1, n2d
do lam1=1,n2d
IF (z(k)+dz.GT.tr) nok=iorb
ENDDO
DO lam=1, n2d
DO lam1=1,n2d
c=ZDOTC(n2d,psiper(1,lam,kz),1,psiper(1,lam1,kz-1),1)
s1m(lam,lam1)=(zk(lam,kz)+zk(lam1,kz-1))/zk(lam,kz)*c
s2m(lam,lam1)=(zk(lam,kz)-zk(lam1,kz-1))/zk(lam,kz)*c
c=ezk(lam1,kz-1)
s3m(lam,lam1)=s1m(lam,lam1)*c
s4m(lam,lam1)=s2m(lam,lam1)*c
enddo
enddo
call ZGEMM('n','n',n2d,n2d,n2d,one,s3m,n2d,app,n2d,one,an,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,s4m,n2d,app,n2d,one,bn,n2d)
ENDDO
ENDDO
CALL ZGEMM('n','n',n2d,n2d,n2d,one,s3m,n2d,app,n2d,one,an,n2d)
CALL ZGEMM('n','n',n2d,n2d,n2d,one,s4m,n2d,app,n2d,one,bn,n2d)
an= an+s2m
bn= bn+s1m
call ZGEMM('n','n',n2d,nok,n2d,one,s3m,n2d,anlp,n2d,one,anln,n2d)
call ZGEMM('n','n',n2d,nok,n2d,one,s1m,n2d,fl,n2d,one,anln,n2d)
call ZGEMM('n','n',n2d,nok,n2d,one,s4m,n2d,anlp,n2d,one,bnln,n2d)
call ZGEMM('n','n',n2d,nok,n2d,one,s2m,n2d,fl,n2d,one,bnln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s3m,n2d,anlp,n2d,one,anln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s1m,n2d,fl,n2d,one,anln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s4m,n2d,anlp,n2d,one,bnln,n2d)
CALL ZGEMM('n','n',n2d,nok,n2d,one,s2m,n2d,fl,n2d,one,bnln,n2d)
an=an*0.5d0
bn=bn*0.5d0
anln=anln*0.5d0
bnln=bnln*0.5d0
do lam=1, n2d
do n=1, n2d
DO lam=1, n2d
DO n=1, n2d
bn(lam,n)=bn(lam,n)*emzk(lam,kz)
enddo
do iorb=1, nok
ENDDO
DO iorb=1, nok
bnln(lam,iorb)=bnln(lam,iorb)*emzk(lam,kz)
enddo
enddo
call DCOPY(2*n2d*n2d, an, 1, app, 1)
call DCOPY(2*n2d*n2d, bn, 1, bpp, 1)
ENDDO
ENDDO
CALL DCOPY(2*n2d*n2d, an, 1, app, 1)
CALL DCOPY(2*n2d*n2d, bn, 1, bpp, 1)
an=(0.d0,0.d0)
bn=(0.d0,0.d0)
call DCOPY(2*norbnow*npol*n2d, anln, 1, anlp, 1)
call DCOPY(2*norbnow*npol*n2d, bnln, 1, bnlp, 1)
CALL DCOPY(2*norbnow*npol*n2d, anln, 1, anlp, 1)
CALL DCOPY(2*norbnow*npol*n2d, bnln, 1, bnlp, 1)
anln=(0.d0,0.d0)
bnln=(0.d0,0.d0)
fl=(0.d0,0.d0)
!
do iorb=1, norbnow*npol
DO iorb=1, norbnow*npol
iorba=iorb
if (npol.eq.2) iorba=(iorb+1)/2
IF (npol.EQ.2) iorba=(iorb+1)/2
iorbs = orbin + iorba
if(cross(iorbs, k).eq.1) then
do lam=1, n2d
IF(cross(iorbs, k).EQ.1) THEN
DO lam=1, n2d
arg=ezk(lam,kz)
do n=1, n2d
DO n=1, n2d
intw1(iorb,n)=intw1(iorb,n)+app(lam,n)*ci(iorb,lam,kz)+ &
bpp(lam,n)*di(iorb,lam,kz)
enddo
if (abs(DIMAG(zk(lam,kz))).lt.eps) then
ENDDO
IF (ABS(DIMAG(zk(lam,kz))).LT.eps) THEN
f1p=-arg*CONJG(di(iorb,lam,kz))*zk2(lam,kz)
fl(lam,iorb)=-arg*CONJG(ci(iorb,lam,kz))*zk2(lam,kz)
else
ELSE
f1p=-CONJG(ci(iorb,lam,kz))*zk2(lam,kz)
fl(lam,iorb)=-CONJG(di(iorb,lam,kz))*zk2(lam,kz)
endif
ENDIF
bnlp(lam,iorb)=bnlp(lam,iorb)-f1p*emzk(lam,kz)
enddo
endif
enddo
do iorb=1, norbnow*npol
ENDDO
ENDIF
ENDDO
DO iorb=1, norbnow*npol
iorba=iorb
if (npol.eq.2) iorba=(iorb+1)/2
IF (npol.EQ.2) iorba=(iorb+1)/2
iorbs = orbin + iorba
if(cross(iorbs, k).eq.1) then
do iorb1=1, norbnow*npol
IF(cross(iorbs, k).EQ.1) THEN
DO iorb1=1, norbnow*npol
iorb1a=iorb1
if (npol.eq.2) iorb1a=(iorb1+1)/2
IF (npol.EQ.2) iorb1a=(iorb1+1)/2
iorb1s = orbin + iorb1a
tr=taunew(3,iorb1s)-rsph(nbnew(iorb1s),itnew(iorb1s))
if (z(k)+dz.gt.tr) then
IF (z(k)+dz.GT.tr) THEN
c=(0.d0, 0.d0)
do lam=1, n2d
DO lam=1, n2d
c=c+anlp(lam,iorb1)*ci(iorb,lam,kz)+ &
bnlp(lam,iorb1)*di(iorb,lam,kz)
enddo
ENDDO
intw2(iorb,iorb1)=intw2(iorb,iorb1)+c
endif
enddo
endif
enddo
ENDIF
ENDDO
ENDIF
ENDDO
!
! Rotation of linear solutions
!
call stop_clock('scatter')
CALL stop_clock('scatter')
call rotatef(app, bpp, bf, anlp, bnlp, bnlf, intw1, intw2, &
CALL rotatef(app, bpp, bf, anlp, bnlp, bnlf, intw1, intw2, &
n2d, norbf, norbnow, npol)
!write(6,*) 'done k', k
enddo
ENDDO
!---------------------------------------------
call DCOPY(2*n2d*n2d, app, 1, al, 1)
CALL DCOPY(2*n2d*n2d, app, 1, al, 1)
!
! To compute the 2nd half of linear solutions
!
call scatter_back(app, bpp, an, bn, af, ci, di, ezk, emzk, &
CALL scatter_back(app, bpp, an, bn, af, ci, di, ezk, emzk, &
s1m, s2m, s3m, s4m, startk, lastk, &
norbnow, orbin, dz)
call DCOPY(2*n2d*n2d, bpp, 1, bl, 1)
call DCOPY(2*n2d*norbnow*npol, anlp, 1, anll, 1)
call DSCAL(2*norbf*npol*2*n2d, sarea, intw1, 1)
call DSCAL(2*norbf*npol*norbf*npol, sarea, intw2, 1)
CALL DCOPY(2*n2d*n2d, bpp, 1, bl, 1)
CALL DCOPY(2*n2d*norbnow*npol, anlp, 1, anll, 1)
CALL DSCAL(2*norbf*npol*2*n2d, sarea, intw1, 1)
CALL DSCAL(2*norbf*npol*norbf*npol, sarea, intw2, 1)
!
! To construct functions and derivetives on the boundaries
!
! local solutions
allocate( s5m( n2d, n2d ) )
allocate( s6m( n2d, n2d ) )
allocate( s7m( n2d, n2d ) )
allocate( s8m( n2d, n2d ) )
ALLOCATE( s5m( n2d, n2d ) )
ALLOCATE( s6m( n2d, n2d ) )
ALLOCATE( s7m( n2d, n2d ) )
ALLOCATE( s8m( n2d, n2d ) )
! fun0=(0.d0,0.d0)
! fun1=(0.d0,0.d0)
! fund0=(0.d0,0.d0)
! fund1=(0.d0,0.d0)
k=nkofz(startk)
kz=nkofz(lastk)
do n=1, n2d
do lam=1, n2d
DO n=1, n2d
DO lam=1, n2d
s1m(lam,n)=bf(lam,n)*ezk(lam,k)
s2m(lam,n)=al(lam,n)*ezk(lam,kz)
if (lam.eq.n) s2m(lam,n)=s2m(lam,n)+(1.d0,0.d0)
IF (lam.EQ.n) s2m(lam,n)=s2m(lam,n)+(1.d0,0.d0)
s3m(lam,n)=-cim*zk(lam,k)*s1m(lam,n)*tpiba
s4m(lam,n)=cim*zk(lam,kz)*s2m(lam,n)*tpiba
if (lam.eq.n) s4m(lam,n)=s4m(lam,n)-2.d0*cim*zk(lam,kz)*tpiba
IF (lam.EQ.n) s4m(lam,n)=s4m(lam,n)-2.d0*cim*zk(lam,kz)*tpiba
s5m(lam,n)=bl(lam,n)*ezk(lam,k)
if (lam.eq.n) s5m(lam,n)=s5m(lam,n)+(1.d0,0.d0)
IF (lam.EQ.n) s5m(lam,n)=s5m(lam,n)+(1.d0,0.d0)
s6m(lam,n)=af(lam,n)*ezk(lam,kz)
s7m(lam,n)=-cim*zk(lam,k)*s5m(lam,n)*tpiba
if (lam.eq.n) s7m(lam,n)=s7m(lam,n)+2.d0*cim*zk(lam,k)*tpiba
IF (lam.EQ.n) s7m(lam,n)=s7m(lam,n)+2.d0*cim*zk(lam,k)*tpiba
s8m(lam,n)=cim*zk(lam,kz)*s6m(lam,n)*tpiba
enddo
enddo
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s1m,n2d,zero,fun0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s2m,n2d,zero,fun1,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s3m,n2d,zero,fund0,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s4m,n2d,zero,fund1,n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s5m,n2d,zero, &
ENDDO
ENDDO
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s1m,n2d,zero,fun0,n2d)
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s2m,n2d,zero,fun1,n2d)
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s3m,n2d,zero,fund0,n2d)
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s4m,n2d,zero,fund1,n2d)
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s5m,n2d,zero, &
fun0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s6m,n2d,zero, &
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s6m,n2d,zero, &
fun1(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s7m,n2d,zero, &
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,k),n2d,s7m,n2d,zero, &
fund0(1,n2d+1),n2d)
call ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s8m,n2d,zero, &
CALL ZGEMM('n','n',n2d,n2d,n2d,one,psiper(1,1,kz),n2d,s8m,n2d,zero, &
fund1(1,n2d+1),n2d)
! nonlocal solutions
funl0=(0.d0,0.d0)
funl1=(0.d0,0.d0)
fundl0=(0.d0,0.d0)
fundl1=(0.d0,0.d0)
do iorb=1, norbnow*npol
do lam=1, n2d
DO iorb=1, norbnow*npol
DO lam=1, n2d
s1=ff(lam,iorb)+bnlf(lam,iorb)*ezk(lam,k)
s2=fl(lam,iorb)+anll(lam,iorb)*ezk(lam,kz)
s3=-cim*zk(lam,k)*s1*tpiba
s4=cim*zk(lam,kz)*s2*tpiba
do ig=1, n2d
DO ig=1, n2d
funl0(ig,iorb)=funl0(ig,iorb)+psiper(ig,lam,k)*s1
funl1(ig,iorb)=funl1(ig,iorb)+psiper(ig,lam,kz)*s2
fundl0(ig,iorb)=fundl0(ig,iorb)+psiper(ig,lam,k)*s3
fundl1(ig,iorb)=fundl1(ig,iorb)+psiper(ig,lam,kz)*s4
enddo
enddo
enddo
ENDDO
ENDDO
ENDDO
deallocate(ci)
deallocate(di)
deallocate(bf)
deallocate(an)
deallocate(bn)
deallocate(app)
deallocate(bpp)
deallocate(al)
deallocate(bl)
deallocate(af)
deallocate(bnlf)
deallocate(anln)
deallocate(bnln)
deallocate(anlp)
deallocate(bnlp)
deallocate(anll)
deallocate(ff)
deallocate(fl)
deallocate(s1m)
deallocate(s2m)
deallocate(s3m)
deallocate(s4m)
deallocate(s5m)
deallocate(s6m)
deallocate(s7m)
deallocate(s8m)
deallocate(ezk)
deallocate(emzk)
deallocate(ezk1)
deallocate(emzk1)
deallocate(zk2)
DEALLOCATE(ci)
DEALLOCATE(di)
DEALLOCATE(bf)
DEALLOCATE(an)
DEALLOCATE(bn)
DEALLOCATE(app)
DEALLOCATE(bpp)
DEALLOCATE(al)
DEALLOCATE(bl)
DEALLOCATE(af)
DEALLOCATE(bnlf)
DEALLOCATE(anln)
DEALLOCATE(bnln)
DEALLOCATE(anlp)
DEALLOCATE(bnlp)
DEALLOCATE(anll)
DEALLOCATE(ff)
DEALLOCATE(fl)
DEALLOCATE(s1m)
DEALLOCATE(s2m)
DEALLOCATE(s3m)
DEALLOCATE(s4m)
DEALLOCATE(s5m)
DEALLOCATE(s6m)
DEALLOCATE(s7m)
DEALLOCATE(s8m)
DEALLOCATE(ezk)
DEALLOCATE(emzk)
DEALLOCATE(ezk1)
DEALLOCATE(emzk1)
DEALLOCATE(zk2)
!
! To construct the functions in the whole rigion zin<z<zfin in the
! case of multiparallel running
!
#ifdef __PARA
call rotproc(fun0, fund0, fun1, fund1, funl0, fundl0, funl1, &
CALL rotproc(fun0, fund0, fun1, fund1, funl0, fundl0, funl1, &
fundl1, intw1, intw2, n2d, norbf, norbnow)
#endif
call stop_clock('scatter_forw')
CALL stop_clock('scatter_forw')
return
end subroutine scatter_forw
RETURN
END SUBROUTINE scatter_forw