From acdce530c99a6d5906023bd46946e735b29f6a5b Mon Sep 17 00:00:00 2001 From: Samuel Ponce Date: Tue, 21 Aug 2018 17:25:50 +0100 Subject: [PATCH] Additional para at the level of ephwan2blochp. --- EPW/src/wan2bloch.f90 | 64 ++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/EPW/src/wan2bloch.f90 b/EPW/src/wan2bloch.f90 index 63c1aacac..dae33bcfb 100644 --- a/EPW/src/wan2bloch.f90 +++ b/EPW/src/wan2bloch.f90 @@ -1069,6 +1069,8 @@ ! INTEGER :: ir !! Real space WS index + INTEGER :: irn + !! Combined WS and atom index INTEGER :: ir_start !! Starting ir for this cores INTEGER :: ir_stop @@ -1091,10 +1093,10 @@ ! REAL(kind=DP) :: rdotk !! Exponential for the FT - ! + ! COMPLEX(kind=DP) :: eptmp( nbnd, nbnd, nrr_k, nmodes) !! Temporary matrix to store el-ph - COMPLEX(kind=DP) :: cfac(nat, nrr_g) + COMPLEX(kind=DP) :: cfac(nat,nrr_g) !! Factor for the FT COMPLEX(kind=DP), ALLOCATABLE :: epmatw( :,:,:,:) !! El-ph matrix elements @@ -1111,46 +1113,60 @@ ! g~(R_e,q') is epmatf(nmodes, nmodes, ik ) ! every pool works with its own subset of k points on the fine grid ! - CALL para_bounds(ir_start, ir_stop, nrr_g) + ! SP: Because nrr_g can be quite small, we do a combined parallelization + ! on WS vector and atoms + CALL para_bounds(ir_start, ir_stop, nrr_g * nat) ! eptmp(:,:,:,:) = czero cfac(:,:) = czero ! - DO ir = ir_start, ir_stop + DO irn = ir_start, ir_stop + ir = (irn-1)/nat + 1 + na = MOD(irn-1,nat) +1 ! ! note xxq is assumed to be already in cryst coord ! rdotk = twopi * dot_product ( xxq, dble(irvec_g(:, ir)) ) - DO na = 1, nat - IF (ndegen_g(ir,na) > 0) & - cfac(na,ir) = exp( ci*rdotk ) / dble( ndegen_g(ir,na) ) - ENDDO + IF (ndegen_g(ir,na) > 0) & + cfac(na,ir) = exp( ci*rdotk ) / dble( ndegen_g(ir,na) ) ENDDO ! IF (etf_mem == 0) then ! - DO na = 1, nat - Call zgemv( 'n', nbnd * nbnd * nrr_k * 3, ir_stop - ir_start + 1, cone, & - epmatwp(:,:,:,3*(na-1)+1:3*na,ir_start:ir_stop), nbnd * nbnd * nrr_k * 3, & - cfac(na,ir_start:ir_stop), 1, czero, eptmp(:,:,:,3*(na-1)+1:3*na), 1 ) + DO irn = ir_start, ir_stop + ir = (irn-1)/nat + 1 + na = MOD(irn-1,nat) + 1 + ! + !print*,'irn ',irn, shape(cfac), shape(epmatwp(:,:,:,:,:)), 3*(na-1)+1,3*na, ir + CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatwp(:,:,:,3*(na-1)+1:3*na,ir), 1, & + eptmp(:,:,:,3*(na-1)+1:3*na), 1) + !CALL zgemv( 'n', nbnd * nbnd * nrr_k * 3, ir_stop - ir_start + 1, cone, & + ! epmatwp(:,:,:,3*(na-1)+1:3*na,ir_start:ir_stop), nbnd * nbnd * nrr_k * 3, & + ! cfac(ir_start:ir_stop), 1, czero, eptmp(:,:,:,3*(na-1)+1:3*na), 1 ) ENDDO ! ELSE ! - ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, nmodes)) + !ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, 3)) ! !lrepmatw2 = 2 * nbnd * nbnd * nrr_k * nmodes #if defined(__MPI) + ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, 3)) ! Although this should almost never be problematic (see explaination below) lrepmatw2 = 2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * & INT( nbnd , kind = MPI_OFFSET_KIND ) * & INT( nrr_k , kind = MPI_OFFSET_KIND ) * & - INT( nmodes, kind = MPI_OFFSET_KIND ) + 3_MPI_OFFSET_KIND + !INT( nmodes, kind = MPI_OFFSET_KIND ) #else - lrepmatw2 = INT( 2 * nbnd * nbnd * nrr_k * nmodes, kind = 8) + ALLOCATE(epmatw ( nbnd, nbnd, nrr_k, nmodes)) + lrepmatw2 = INT( 2 * nbnd * nbnd * nrr_k * 3, kind = 8) + !lrepmatw2 = INT( 2 * nbnd * nbnd * nrr_k * nmodes, kind = 8) #endif ! - DO ir = ir_start, ir_stop + DO irn = ir_start, ir_stop + ir = (irn-1)/nat + 1 + na = MOD(irn-1,nat) + 1 #if defined(__MPI) ! DEBUG: print*,'Process ',my_id,' do ',ir,'/ ',ir_stop ! @@ -1163,7 +1179,8 @@ lrepmatw = 2_MPI_OFFSET_KIND * INT( nbnd , kind = MPI_OFFSET_KIND ) * & INT( nbnd , kind = MPI_OFFSET_KIND ) * & INT( nrr_k , kind = MPI_OFFSET_KIND ) * & - INT( nmodes, kind = MPI_OFFSET_KIND ) * & + 3_MPI_OFFSET_KIND * & + !INT( nmodes, kind = MPI_OFFSET_KIND ) * & 8_MPI_OFFSET_KIND * ( INT( ir , kind = MPI_OFFSET_KIND ) - 1_MPI_OFFSET_KIND ) ! ! SP: mpi seek is used to set the position at which we should start @@ -1177,15 +1194,17 @@ !CALL MPI_FILE_READ(iunepmatwp2, aux, lrepmatw2, MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE,ierr) CALL MPI_FILE_READ(iunepmatwp2, epmatw, lrepmatw2, MPI_DOUBLE_PRECISION, MPI_STATUS_IGNORE,ierr) IF( ierr /= 0 ) CALL errore( 'ephwan2blochp', 'error in MPI_FILE_READ_ALL',1 ) + ! + CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatw(:,:,:,:), 1, & + eptmp(:,:,:,3*(na-1)+1:3*na), 1) ! #else CALL rwepmatw ( epmatw, nbnd, nrr_k, nmodes, ir, iunepmatwp, -1) -#endif ! - DO na = 1, nat - CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatw(:,:,:,3*(na-1)+1:3*na), 1, & - eptmp(:,:,:,3*(na-1)+1:3*na), 1) - ENDDO + CALL ZAXPY(nbnd * nbnd * nrr_k * 3, cfac(na,ir), epmatw(:,:,:,3*(na-1)+1:3*na), 1, & + eptmp(:,:,:,3*(na-1)+1:3*na), 1) + +#endif ! ENDDO DEALLOCATE(epmatw) @@ -1204,6 +1223,7 @@ ! Call zgemm( 'n', 'n', nbnd * nbnd * nrr_k, nmodes, nmodes, cone, eptmp, & nbnd * nbnd * nrr_k, cuf, nmodes, czero, epmatf, nbnd * nbnd * nrr_k ) + ! CALL stop_clock('ephW2Bp') !