Additional para at the level of ephwan2blochp.

This commit is contained in:
Samuel Ponce 2018-08-21 17:25:50 +01:00
parent 18e8f76c2c
commit acdce530c9
1 changed files with 42 additions and 22 deletions

View File

@ -1069,6 +1069,8 @@
!
INTEGER :: ir
!! Real space WS index
INTEGER :: irn
!! Combined WS and atom index
INTEGER :: ir_start
!! Starting ir for this cores
INTEGER :: ir_stop
@ -1091,10 +1093,10 @@
!
REAL(kind=DP) :: rdotk
!! Exponential for the FT
!
!
COMPLEX(kind=DP) :: eptmp( nbnd, nbnd, nrr_k, nmodes)
!! Temporary matrix to store el-ph
COMPLEX(kind=DP) :: cfac(nat, nrr_g)
COMPLEX(kind=DP) :: cfac(nat,nrr_g)
!! Factor for the FT
COMPLEX(kind=DP), ALLOCATABLE :: epmatw( :,:,:,:)
!! El-ph matrix elements
@ -1111,46 +1113,60 @@
! g~(R_e,q') is epmatf(nmodes, nmodes, ik )
! every pool works with its own subset of k points on the fine grid
!
CALL para_bounds(ir_start, ir_stop, nrr_g)
! SP: Because nrr_g can be quite small, we do a combined parallelization
! on WS vector and atoms
CALL para_bounds(ir_start, ir_stop, nrr_g * nat)
!
eptmp(:,:,:,:) = czero
cfac(:,:) = czero
!
DO ir = ir_start, ir_stop
DO irn = ir_start, ir_stop
ir = (irn-1)/nat + 1
na = MOD(irn-1,nat) +1
!
! note xxq is assumed to be already in cryst coord
!
rdotk = twopi * dot_product ( xxq, dble(irvec_g(:, ir)) )
DO na = 1, nat
IF (ndegen_g(ir,na) > 0) &
cfac(na,ir) = exp( ci*rdotk ) / dble( ndegen_g(ir,na) )
ENDDO
IF (ndegen_g(ir,na) > 0) &
cfac(na,ir) = exp( ci*rdotk ) / dble( ndegen_g(ir,na) )
ENDDO
!
IF (etf_mem == 0) then
!
DO na = 1, nat
Call zgemv( 'n', nbnd * nbnd * nrr_k * 3, ir_stop - ir_start + 1, cone, &
epmatwp(:,:,:,3*(na-1)+1:3*na,ir_start:ir_stop), nbnd * nbnd * nrr_k * 3, &
cfac(na,ir_start:ir_stop), 1, czero, eptmp(:,:,:,3*(na-1)+1:3*na), 1 )
DO irn = ir_start, ir_stop
ir = (irn-1)/nat + 1
na = MOD(irn-1,nat) + 1
!
!print*,'irn ',irn, shape(cfac), shape(epmatwp(:,:,:,:,:)), 3*(na-1)+1,3*na, ir
CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatwp(:,:,:,3*(na-1)+1:3*na,ir), 1, &
eptmp(:,:,:,3*(na-1)+1:3*na), 1)
!CALL zgemv( 'n', nbnd * nbnd * nrr_k * 3, ir_stop - ir_start + 1, cone, &
! epmatwp(:,:,:,3*(na-1)+1:3*na,ir_start:ir_stop), nbnd * nbnd * nrr_k * 3, &
! cfac(ir_start:ir_stop), 1, czero, eptmp(:,:,:,3*(na-1)+1:3*na), 1 )
ENDDO
!
ELSE
!
ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, nmodes))
!ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, 3))
!
!lrepmatw2 = 2 * nbnd * nbnd * nrr_k * nmodes
#if defined(__MPI)
ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, 3))
! Although this should almost never be problematic (see explaination below)
lrepmatw2 = 2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nrr_k , kind = MPI_OFFSET_KIND ) * &
INT( nmodes, kind = MPI_OFFSET_KIND )
3_MPI_OFFSET_KIND
!INT( nmodes, kind = MPI_OFFSET_KIND )
#else
lrepmatw2 = INT( 2 * nbnd * nbnd * nrr_k * nmodes, kind = 8)
ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, nmodes))
lrepmatw2 = INT( 2 * nbnd * nbnd * nrr_k * 3, kind = 8)
!lrepmatw2 = INT( 2 * nbnd * nbnd * nrr_k * nmodes, kind = 8)
#endif
!
DO ir = ir_start, ir_stop
DO irn = ir_start, ir_stop
ir = (irn-1)/nat + 1
na = MOD(irn-1,nat) + 1
#if defined(__MPI)
! DEBUG: print*,'Process ',my_id,' do ',ir,'/ ',ir_stop
!
@ -1163,7 +1179,8 @@
lrepmatw = 2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nbnd , kind = MPI_OFFSET_KIND ) * &
INT( nrr_k , kind = MPI_OFFSET_KIND ) * &
INT( nmodes, kind = MPI_OFFSET_KIND ) * &
3_MPI_OFFSET_KIND * &
!INT( nmodes, kind = MPI_OFFSET_KIND ) * &
8_MPI_OFFSET_KIND * ( INT( ir , kind = MPI_OFFSET_KIND ) - 1_MPI_OFFSET_KIND )
!
! SP: mpi seek is used to set the position at which we should start
@ -1177,15 +1194,17 @@
!CALL MPI_FILE_READ(iunepmatwp2, aux, lrepmatw2, MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE,ierr)
CALL MPI_FILE_READ(iunepmatwp2, epmatw, lrepmatw2, MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE,ierr)
IF( ierr /= 0 ) CALL errore( 'ephwan2blochp', 'error in MPI_FILE_READ_ALL',1 )
!
CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatw(:,:,:,:), 1, &
eptmp(:,:,:,3*(na-1)+1:3*na), 1)
!
#else
CALL rwepmatw ( epmatw, nbnd, nrr_k, nmodes, ir, iunepmatwp, -1)
#endif
!
DO na = 1, nat
CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatw(:,:,:,3*(na-1)+1:3*na), 1, &
eptmp(:,:,:,3*(na-1)+1:3*na), 1)
ENDDO
CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatw(:,:,:,3*(na-1)+1:3*na), 1, &
eptmp(:,:,:,3*(na-1)+1:3*na), 1)
#endif
!
ENDDO
DEALLOCATE(epmatw)
@ -1204,6 +1223,7 @@
!
Call zgemm( 'n', 'n', nbnd * nbnd * nrr_k, nmodes, nmodes, cone, eptmp, &
nbnd * nbnd * nrr_k, cuf, nmodes, czero, epmatf, nbnd * nbnd * nrr_k )
!
CALL stop_clock('ephW2Bp')
!