quantum-espresso/PWCOND/rotproc.f90

!
! Copyright (C) 2003 A. Smogunov 
! This file is distributed under the terms of the
! GNU General Public License. See the file `License'
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
subroutine rotproc (fun0, fund0, fun1, fund1, funl0, fundl0, funl1,  & 
                   fundl1, intw1, intw2, n2d, norbf, norbnow)
!
! This subroutine implements a matching procedure to construct 
! local and nonlocal functions on the whole region from those computed
! by different CPU.
! It works well with 1, 2, 4, 8, 16... CPU.
! The matching scheme with 8 CPU looks like:

! |   1   |   2   |   3   |   4   |   5   |   6   |   7   |   8   |
!         +               +               +               +
! |               |               |               |               | 
!                 +                               +                
! |                               |                               | 
!                                 +                                 
! |                                                               |
!
! So in this case there are 3 matching steps. 
!
#include "machine.h"
  use parameters, only : DP
#ifdef __PARA
  use para
  implicit none 

  include 'mpif.h'

  integer :: k, ig, n, lam, lam1, iorb, iorb1, norbf, norbnow, n2d,  &
             ibound, numb, ninsl, ib, icolor, ikey, new_comm, info
  integer, allocatable :: ipiv(:) 
  complex(kind=DP) :: fun0(n2d, 2*n2d),    & ! phi_n(0) 
                      fund0(n2d, 2*n2d),   & ! phi'_n(0)  
                      fun1(n2d, 2*n2d),    & ! phi_n(d) 
                      fund1(n2d, 2*n2d),   & ! phi'_n(d) 
                      funl0(n2d, norbf),   & ! phi_alpha(0) 
                      fundl0(n2d, norbf),  & ! phi'_alpha(0) 
                      funl1(n2d, norbf),   & ! phi_alpha(d) 
                      fundl1(n2d, norbf),  & ! phi'_alpha(d) 
                      intw1(norbf, 2*n2d), & ! integrals on phi_n 
                      intw2(norbf, norbf)    ! integrals on phi_alpha

  complex(kind=DP), allocatable :: x(:), y(:), amat(:,:), vec(:,:),  &
                               amat_aux(:,:), vec_aux(:,:)

  if(nproc.eq.1) return

  allocate( x( n2d ) )
  allocate( y( n2d ) )    
  allocate( amat( 2*n2d, 2*n2d ) )
  allocate( amat_aux( 2*n2d, 2*n2d ) ) 
  allocate( vec( 2*n2d, 2*n2d+norbnow ) )
  allocate( vec_aux( 2*n2d, 2*n2d+norbnow ) )
  allocate( ipiv( 2*n2d ) )

  numb=0
  ibound=nproc/2
  ninsl=1
  ib=2*me-me/2*2    

  do while(ibound.gt.0) 

!
!   To find the matching coefficients for a group of CPU 
!
    icolor=(ib+ninsl-1)/(2*ninsl)
    ikey=(me+ninsl)-ib               
    call mpi_barrier (MPI_COMM_WORLD, info)      
    call mpi_comm_split(MPI_COMM_WORLD, icolor, ikey, new_comm, info)
    amat=(0.d0,0.d0)
    vec=(0.d0,0.d0)

    if(me.eq.ib) then
      do lam=1, n2d
        do lam1=1, n2d
          amat(lam, n2d+lam1)=-fun0(lam,n2d+lam1)
          amat(n2d+lam, n2d+lam1)=-fund0(lam,n2d+lam1)
          vec(lam, lam1)=fun0(lam, lam1)
          vec(n2d+lam, lam1)=fund0(lam, lam1)
        enddo
        do iorb=1, norbnow
          vec(lam, 2*n2d+iorb)=funl0(lam, iorb)
          vec(n2d+lam, 2*n2d+iorb)=fundl0(lam, iorb)
        enddo
      enddo              
      numb=numb+1
    endif
    if(me.eq.ib-1) then
      do lam=1, n2d
        do lam1=1, n2d
          amat(lam, lam1)=fun1(lam,lam1)
          amat(n2d+lam, lam1)=fund1(lam,lam1)
          vec(lam, n2d+lam1)=-fun1(lam, n2d+lam1)
          vec(n2d+lam, n2d+lam1)=-fund1(lam, n2d+lam1)
        enddo
        do iorb=1, norbnow
          vec(lam, 2*n2d+iorb)=-funl1(lam, iorb)
          vec(n2d+lam, 2*n2d+iorb)=-fundl1(lam, iorb)
        enddo
      enddo                      
      numb=numb+1
    endif
    call mpi_allreduce(amat, amat_aux, 2*2*n2d*2*n2d, MPI_REAL8,     &
                       MPI_SUM, new_comm, info)
    call mpi_allreduce(vec, vec_aux, 2*2*n2d*(2*n2d+norbnow),        &
                       MPI_REAL8, MPI_SUM, new_comm, info)
    call DCOPY(2*2*n2d*2*n2d, amat_aux, 1, amat, 1)  
    call DCOPY(2*2*n2d*(2*n2d+norbnow), vec_aux, 1, vec, 1) 
    call ZGESV(2*n2d, 2*n2d+norbnow, amat, 2*n2d, ipiv,              &
               vec, 2*n2d, info)          
!
!   recalculate the functions for CPU which is left to matching 
!   boundary
!
    if(numb.le.1.and.me/2*2.eq.me) then
     do ig=1, n2d
      do n=1, n2d
       do lam=1, n2d
        fun1(ig, n)=fun1(ig, n)+  &
                           vec(n2d+lam, n)*fun1(ig, n2d+lam)
        fund1(ig, n)=fund1(ig, n)+  &
                           vec(n2d+lam, n)*fund1(ig, n2d+lam)
       enddo
      enddo
      do iorb=1, norbnow
       do lam=1, n2d
        funl1(ig, iorb)=funl1(ig, iorb)+  &
                vec(n2d+lam, 2*n2d+iorb)*fun1(ig, n2d+lam)
        fundl1(ig, iorb)=fundl1(ig, iorb)+  &
                vec(n2d+lam, 2*n2d+iorb)*fund1(ig, n2d+lam)
       enddo
      enddo
     enddo
     do ig=1, n2d
       x=(0.d0,0.d0)
       y=(0.d0,0.d0)
       do n=1, n2d
         do lam=1, n2d
           x(n)=x(n)+vec(n2d+lam, n2d+n)*fun1(ig, n2d+lam)
           y(n)=y(n)+vec(n2d+lam, n2d+n)*fund1(ig, n2d+lam)
         enddo
       enddo
       do n=1, n2d
         fun1(ig, n2d+n)=x(n)
         fund1(ig, n2d+n)=y(n)
       enddo
     enddo                                   
    endif
!
!   recalculate the functions for CPU which is right to matching
!   boundary  
!
    if(numb.le.1.and.me/2*2.ne.me) then
     do ig=1, n2d
      do n=1, n2d
       do lam=1, n2d
        fun0(ig, n2d+n)=fun0(ig, n2d+n)+  &
                           vec(lam, n2d+n)*fun0(ig, lam)
        fund0(ig, n2d+n)=fund0(ig, n2d+n)+  &
                           vec(lam, n2d+n)*fund0(ig, lam)
       enddo
      enddo
      do iorb=1, norbnow
       do lam=1, n2d
        funl0(ig, iorb)=funl0(ig, iorb)+  &
                     vec(lam, 2*n2d+iorb)*fun0(ig, lam)
        fundl0(ig, iorb)=fundl0(ig, iorb)+  &
                     vec(lam, 2*n2d+iorb)*fund0(ig, lam)
       enddo
      enddo
     enddo
     do ig=1, n2d
       x=(0.d0,0.d0)
       y=(0.d0,0.d0)
       do n=1, n2d
         do lam=1, n2d
           x(n)=x(n)+vec(lam, n)*fun0(ig, lam)
           y(n)=y(n)+vec(lam, n)*fund0(ig, lam)
         enddo
       enddo
       do n=1, n2d
         fun0(ig, n)=x(n)
         fund0(ig, n)=y(n)
       enddo
     enddo                                             
    endif  
!
!  to recalculate the integrals for a given group of CPU
!
    if(me.ge.ib) then
     do iorb=1, norbnow
      do iorb1=1, norbnow
       do lam=1, n2d
        intw2(iorb,iorb1)=intw2(iorb,iorb1)+           &
           vec(n2d+lam, 2*n2d+iorb1)*intw1(iorb, n2d+lam)
       enddo
      enddo
     enddo
     do iorb=1, norbnow
       x=(0.d0,0.d0)
       do n=1, n2d
         do lam=1, n2d
           x(n)=x(n)+vec(n2d+lam, n2d+n)*intw1(iorb, n2d+lam)
           intw1(iorb, n)=intw1(iorb, n)+   &
               vec(n2d+lam, n)*intw1(iorb, n2d+lam)
         enddo
       enddo
       do n=1, n2d
         intw1(iorb, n2d+n)=x(n)
       enddo
     enddo                            
    else
     do iorb=1, norbnow
      do iorb1=1, norbnow
       do lam=1, n2d
        intw2(iorb, iorb1)=intw2(iorb, iorb1)+           &
             vec(lam, 2*n2d+iorb1)*intw1(iorb, lam)
       enddo
      enddo
     enddo
     do iorb=1, norbnow
       x=(0.d0,0.d0)
       do n=1, n2d
         do lam=1, n2d
           x(n)=x(n)+vec(lam, n)*intw1(iorb, lam)
           intw1(iorb, n2d+n)=intw1(iorb, n2d+n)+   &
               vec(lam, n2d+n)*intw1(iorb, lam)
         enddo
       enddo
       do n=1, n2d
         intw1(iorb, n)=x(n)
       enddo
     enddo                                       
    endif 

!
!  to next matching step
!
    n=(ib+ninsl-1)/(2*ninsl)
    if(n/2*2.eq.n) then
      ib=ib-ninsl
    else
      ib=ib+ninsl
    endif
    ninsl=ninsl*2
    ibound=ibound/2              

    call mpi_comm_free(new_comm, info) 
  enddo

!
! Broadcast of the functions and the integrals to all CPU
!
  call mpi_bcast(fun0, 2*n2d*2*n2d, MPI_REAL8,  0,          &
                 MPI_COMM_WORLD, info)
  call mpi_bcast(fund0, 2*n2d*2*n2d, MPI_REAL8, 0,          &
                 MPI_COMM_WORLD, info)   
  call mpi_bcast(funl0, 2*n2d*norbf, MPI_REAL8,  0,         &
                 MPI_COMM_WORLD, info)   
  call mpi_bcast(fundl0, 2*n2d*norbf, MPI_REAL8, 0,         &
                 MPI_COMM_WORLD, info)   

  call mpi_bcast(fun1, 2*n2d*2*n2d, MPI_REAL8,  nproc-1,    &
                 MPI_COMM_WORLD, info)
  call mpi_bcast(fund1, 2*n2d*2*n2d, MPI_REAL8, nproc-1,    &
                 MPI_COMM_WORLD, info)
  call mpi_bcast(funl1, 2*n2d*norbf, MPI_REAL8,  nproc-1,   &
                 MPI_COMM_WORLD, info)
  call mpi_bcast(fundl1, 2*n2d*norbf, MPI_REAL8, nproc-1,   &
                 MPI_COMM_WORLD, info)                         

!
! Gathering of the integrals
!
  call reduce(2*norbf*2*n2d, intw1)
  call reduce(2*norbf*norbf, intw2)

  deallocate(x)
  deallocate(y)
  deallocate(amat)
  deallocate(amat_aux)
  deallocate(vec)
  deallocate(vec_aux)
  deallocate(ipiv)
     
#endif
  return
end subroutine rotproc
Program for complex bands and conductance calculation. git-svn-id: http://qeforge.qe-forge.org/svn/q-e/trunk/espresso@319 c92efa57-630b-4861-b058-cf58834340f0 2003-09-24 18:03:55 +08:00			`!`
			`! Copyright (C) 2003 A. Smogunov`
			`! This file is distributed under the terms of the`
			! GNU General Public License. See the file `License'
			`! in the root directory of the present distribution,`
			`! or http://www.gnu.org/copyleft/gpl.txt .`
			`!`
			`subroutine rotproc (fun0, fund0, fun1, fund1, funl0, fundl0, funl1, &`
			`fundl1, intw1, intw2, n2d, norbf, norbnow)`
			`!`
			`! This subroutine implements a matching procedure to construct`
			`! local and nonlocal functions on the whole region from those computed`
			`! by different CPU.`
			`! It works well with 1, 2, 4, 8, 16... CPU.`
			`! The matching scheme with 8 CPU looks like:`

			`! \| 1 \| 2 \| 3 \| 4 \| 5 \| 6 \| 7 \| 8 \|`
			`! + + + +`
			`! \| \| \| \| \|`
			`! + +`
			`! \| \| \|`
			`! +`
			`! \| \|`
			`!`
			`! So in this case there are 3 matching steps.`
			`!`
			`#include "machine.h"`
			`use parameters, only : DP`
			`#ifdef __PARA`
			`use para`
			`implicit none`

			`include 'mpif.h'`

			`integer :: k, ig, n, lam, lam1, iorb, iorb1, norbf, norbnow, n2d, &`
			`ibound, numb, ninsl, ib, icolor, ikey, new_comm, info`
			`integer, allocatable :: ipiv(:)`
			`complex(kind=DP) :: fun0(n2d, 2*n2d), & ! phi_n(0)`
			`fund0(n2d, 2*n2d), & ! phi'_n(0)`
			`fun1(n2d, 2*n2d), & ! phi_n(d)`
			`fund1(n2d, 2*n2d), & ! phi'_n(d)`
			`funl0(n2d, norbf), & ! phi_alpha(0)`
			`fundl0(n2d, norbf), & ! phi'_alpha(0)`
			`funl1(n2d, norbf), & ! phi_alpha(d)`
			`fundl1(n2d, norbf), & ! phi'_alpha(d)`
			`intw1(norbf, 2*n2d), & ! integrals on phi_n`
			`intw2(norbf, norbf) ! integrals on phi_alpha`

			`complex(kind=DP), allocatable :: x(:), y(:), amat(:,:), vec(:,:), &`
			`amat_aux(:,:), vec_aux(:,:)`

			`if(nproc.eq.1) return`

			`allocate( x( n2d ) )`
			`allocate( y( n2d ) )`
			`allocate( amat( 2n2d, 2n2d ) )`
			`allocate( amat_aux( 2n2d, 2n2d ) )`
			`allocate( vec( 2n2d, 2n2d+norbnow ) )`
			`allocate( vec_aux( 2n2d, 2n2d+norbnow ) )`
			`allocate( ipiv( 2*n2d ) )`

			`numb=0`
			`ibound=nproc/2`
			`ninsl=1`
			`ib=2me-me/22`

			`do while(ibound.gt.0)`

			`!`
			`! To find the matching coefficients for a group of CPU`
			`!`
			`icolor=(ib+ninsl-1)/(2*ninsl)`
			`ikey=(me+ninsl)-ib`
			`call mpi_barrier (MPI_COMM_WORLD, info)`
			`call mpi_comm_split(MPI_COMM_WORLD, icolor, ikey, new_comm, info)`
			`amat=(0.d0,0.d0)`
			`vec=(0.d0,0.d0)`

			`if(me.eq.ib) then`
			`do lam=1, n2d`
			`do lam1=1, n2d`
			`amat(lam, n2d+lam1)=-fun0(lam,n2d+lam1)`
			`amat(n2d+lam, n2d+lam1)=-fund0(lam,n2d+lam1)`
			`vec(lam, lam1)=fun0(lam, lam1)`
			`vec(n2d+lam, lam1)=fund0(lam, lam1)`
			`enddo`
			`do iorb=1, norbnow`
			`vec(lam, 2*n2d+iorb)=funl0(lam, iorb)`
			`vec(n2d+lam, 2*n2d+iorb)=fundl0(lam, iorb)`
			`enddo`
			`enddo`
			`numb=numb+1`
			`endif`
			`if(me.eq.ib-1) then`
			`do lam=1, n2d`
			`do lam1=1, n2d`
			`amat(lam, lam1)=fun1(lam,lam1)`
			`amat(n2d+lam, lam1)=fund1(lam,lam1)`
			`vec(lam, n2d+lam1)=-fun1(lam, n2d+lam1)`
			`vec(n2d+lam, n2d+lam1)=-fund1(lam, n2d+lam1)`
			`enddo`
			`do iorb=1, norbnow`
			`vec(lam, 2*n2d+iorb)=-funl1(lam, iorb)`
			`vec(n2d+lam, 2*n2d+iorb)=-fundl1(lam, iorb)`
			`enddo`
			`enddo`
			`numb=numb+1`
			`endif`
			`call mpi_allreduce(amat, amat_aux, 22n2d2n2d, MPI_REAL8, &`
			`MPI_SUM, new_comm, info)`
			`call mpi_allreduce(vec, vec_aux, 22n2d(2n2d+norbnow), &`
			`MPI_REAL8, MPI_SUM, new_comm, info)`
			`call DCOPY(22n2d2n2d, amat_aux, 1, amat, 1)`
			`call DCOPY(22n2d(2n2d+norbnow), vec_aux, 1, vec, 1)`
			`call ZGESV(2n2d, 2n2d+norbnow, amat, 2*n2d, ipiv, &`
			`vec, 2*n2d, info)`
			`!`
			`! recalculate the functions for CPU which is left to matching`
			`! boundary`
			`!`
			`if(numb.le.1.and.me/2*2.eq.me) then`
			`do ig=1, n2d`
			`do n=1, n2d`
			`do lam=1, n2d`
			`fun1(ig, n)=fun1(ig, n)+ &`
			`vec(n2d+lam, n)*fun1(ig, n2d+lam)`
			`fund1(ig, n)=fund1(ig, n)+ &`
			`vec(n2d+lam, n)*fund1(ig, n2d+lam)`
			`enddo`
			`enddo`
			`do iorb=1, norbnow`
			`do lam=1, n2d`
			`funl1(ig, iorb)=funl1(ig, iorb)+ &`
			`vec(n2d+lam, 2n2d+iorb)fun1(ig, n2d+lam)`
			`fundl1(ig, iorb)=fundl1(ig, iorb)+ &`
			`vec(n2d+lam, 2n2d+iorb)fund1(ig, n2d+lam)`
			`enddo`
			`enddo`
			`enddo`
			`do ig=1, n2d`
			`x=(0.d0,0.d0)`
			`y=(0.d0,0.d0)`
			`do n=1, n2d`
			`do lam=1, n2d`
			`x(n)=x(n)+vec(n2d+lam, n2d+n)*fun1(ig, n2d+lam)`
			`y(n)=y(n)+vec(n2d+lam, n2d+n)*fund1(ig, n2d+lam)`
			`enddo`
			`enddo`
			`do n=1, n2d`
			`fun1(ig, n2d+n)=x(n)`
			`fund1(ig, n2d+n)=y(n)`
			`enddo`
			`enddo`
			`endif`
			`!`
			`! recalculate the functions for CPU which is right to matching`
			`! boundary`
			`!`
			`if(numb.le.1.and.me/2*2.ne.me) then`
			`do ig=1, n2d`
			`do n=1, n2d`
			`do lam=1, n2d`
			`fun0(ig, n2d+n)=fun0(ig, n2d+n)+ &`
			`vec(lam, n2d+n)*fun0(ig, lam)`
			`fund0(ig, n2d+n)=fund0(ig, n2d+n)+ &`
			`vec(lam, n2d+n)*fund0(ig, lam)`
			`enddo`
			`enddo`
			`do iorb=1, norbnow`
			`do lam=1, n2d`
			`funl0(ig, iorb)=funl0(ig, iorb)+ &`
			`vec(lam, 2n2d+iorb)fun0(ig, lam)`
			`fundl0(ig, iorb)=fundl0(ig, iorb)+ &`
			`vec(lam, 2n2d+iorb)fund0(ig, lam)`
			`enddo`
			`enddo`
			`enddo`
			`do ig=1, n2d`
			`x=(0.d0,0.d0)`
			`y=(0.d0,0.d0)`
			`do n=1, n2d`
			`do lam=1, n2d`
			`x(n)=x(n)+vec(lam, n)*fun0(ig, lam)`
			`y(n)=y(n)+vec(lam, n)*fund0(ig, lam)`
			`enddo`
			`enddo`
			`do n=1, n2d`
			`fun0(ig, n)=x(n)`
			`fund0(ig, n)=y(n)`
			`enddo`
			`enddo`
			`endif`
			`!`
			`! to recalculate the integrals for a given group of CPU`
			`!`
			`if(me.ge.ib) then`
			`do iorb=1, norbnow`
			`do iorb1=1, norbnow`
			`do lam=1, n2d`
			`intw2(iorb,iorb1)=intw2(iorb,iorb1)+ &`
			`vec(n2d+lam, 2n2d+iorb1)intw1(iorb, n2d+lam)`
			`enddo`
			`enddo`
			`enddo`
			`do iorb=1, norbnow`
			`x=(0.d0,0.d0)`
			`do n=1, n2d`
			`do lam=1, n2d`
			`x(n)=x(n)+vec(n2d+lam, n2d+n)*intw1(iorb, n2d+lam)`
			`intw1(iorb, n)=intw1(iorb, n)+ &`
			`vec(n2d+lam, n)*intw1(iorb, n2d+lam)`
			`enddo`
			`enddo`
			`do n=1, n2d`
			`intw1(iorb, n2d+n)=x(n)`
			`enddo`
			`enddo`
			`else`
			`do iorb=1, norbnow`
			`do iorb1=1, norbnow`
			`do lam=1, n2d`
			`intw2(iorb, iorb1)=intw2(iorb, iorb1)+ &`
			`vec(lam, 2n2d+iorb1)intw1(iorb, lam)`
			`enddo`
			`enddo`
			`enddo`
			`do iorb=1, norbnow`
			`x=(0.d0,0.d0)`
			`do n=1, n2d`
			`do lam=1, n2d`
			`x(n)=x(n)+vec(lam, n)*intw1(iorb, lam)`
			`intw1(iorb, n2d+n)=intw1(iorb, n2d+n)+ &`
			`vec(lam, n2d+n)*intw1(iorb, lam)`
			`enddo`
			`enddo`
			`do n=1, n2d`
			`intw1(iorb, n)=x(n)`
			`enddo`
			`enddo`
			`endif`

			`!`
			`! to next matching step`
			`!`
			`n=(ib+ninsl-1)/(2*ninsl)`
			`if(n/2*2.eq.n) then`
			`ib=ib-ninsl`
			`else`
			`ib=ib+ninsl`
			`endif`
			`ninsl=ninsl*2`
			`ibound=ibound/2`

			`call mpi_comm_free(new_comm, info)`
			`enddo`

			`!`
			`! Broadcast of the functions and the integrals to all CPU`
			`!`
			`call mpi_bcast(fun0, 2n2d2*n2d, MPI_REAL8, 0, &`
			`MPI_COMM_WORLD, info)`
			`call mpi_bcast(fund0, 2n2d2*n2d, MPI_REAL8, 0, &`
			`MPI_COMM_WORLD, info)`
			`call mpi_bcast(funl0, 2n2dnorbf, MPI_REAL8, 0, &`
			`MPI_COMM_WORLD, info)`
			`call mpi_bcast(fundl0, 2n2dnorbf, MPI_REAL8, 0, &`
			`MPI_COMM_WORLD, info)`

			`call mpi_bcast(fun1, 2n2d2*n2d, MPI_REAL8, nproc-1, &`
			`MPI_COMM_WORLD, info)`
			`call mpi_bcast(fund1, 2n2d2*n2d, MPI_REAL8, nproc-1, &`
			`MPI_COMM_WORLD, info)`
			`call mpi_bcast(funl1, 2n2dnorbf, MPI_REAL8, nproc-1, &`
			`MPI_COMM_WORLD, info)`
			`call mpi_bcast(fundl1, 2n2dnorbf, MPI_REAL8, nproc-1, &`
			`MPI_COMM_WORLD, info)`

			`!`
			`! Gathering of the integrals`
			`!`
			`call reduce(2norbf2*n2d, intw1)`
			`call reduce(2norbfnorbf, intw2)`

			`deallocate(x)`
			`deallocate(y)`
			`deallocate(amat)`
			`deallocate(amat_aux)`
			`deallocate(vec)`
			`deallocate(vec_aux)`
			`deallocate(ipiv)`

			`#endif`
			`return`
			`end subroutine rotproc`