From d34b2124070dff1cde23d9ae28ab23fef60bfc5c Mon Sep 17 00:00:00 2001 From: Samuel Ponce Date: Sun, 12 Jan 2020 17:02:01 +0100 Subject: [PATCH] Support for integer kind 8 byte for mp_sum and mp_bcast (only for vectors now). --- EPW/src/ephwann_shuffle.f90 | 6 +- EPW/src/ephwann_shuffle_mem.f90 | 12 +- EPW/src/io_transport.f90 | 20 +- Modules/kind.f90 | 69 +++-- UtilXlib/mp.f90 | 352 ++++++++++++++----------- UtilXlib/mp_base.f90 | 452 +++++++++++++++++++++++--------- UtilXlib/util_param.f90 | 22 +- 7 files changed, 596 insertions(+), 337 deletions(-) diff --git a/EPW/src/ephwann_shuffle.f90 b/EPW/src/ephwann_shuffle.f90 index 70b0fcb04..9d49d5cc9 100644 --- a/EPW/src/ephwann_shuffle.f90 +++ b/EPW/src/ephwann_shuffle.f90 @@ -17,7 +17,7 @@ !! 2) Real-space Wannier to fine grid Bloch space interpolation !----------------------------------------------------------------------- ! - USE kinds, ONLY : DP, i4b + USE kinds, ONLY : DP, i4b, i8b USE pwcom, ONLY : nbnd, nks, nkstot, ef, nelec USE klist_epw, ONLY : et_loc, xk_loc, isk_dummy USE cell_base, ONLY : at, bg @@ -196,9 +196,9 @@ !! Selected q-points within the fsthick window INTEGER, PARAMETER :: nrwsx = 200 !! Maximum number of real-space Wigner-Seitz - INTEGER :: lrepmatw2_restart(npool) + INTEGER(KIND = i8b) :: lrepmatw2_restart(npool) !! To restart opening files - INTEGER :: lrepmatw5_restart(npool) + INTEGER(KIND = i8b) :: lrepmatw5_restart(npool) !! To restart opening files INTEGER :: ctype !! Calculation type: -1 = hole, +1 = electron and 0 = both. diff --git a/EPW/src/ephwann_shuffle_mem.f90 b/EPW/src/ephwann_shuffle_mem.f90 index 321f365ac..3bef5903f 100644 --- a/EPW/src/ephwann_shuffle_mem.f90 +++ b/EPW/src/ephwann_shuffle_mem.f90 @@ -21,7 +21,7 @@ !! !----------------------------------------------------------------------- ! - USE kinds, ONLY : DP, i4b + USE kinds, ONLY : DP, i4b, i8b USE pwcom, ONLY : nbnd, nks, nkstot, ef, nelec USE klist_epw, ONLY : et_loc, xk_loc, isk_dummy USE cell_base, ONLY : at, bg @@ -199,9 +199,9 @@ !! Selected q-points within the fsthick window INTEGER, PARAMETER :: nrwsx = 200 !! Maximum number of real-space Wigner-Seitz - INTEGER :: lrepmatw2_restart(npool) + INTEGER(KIND = i8b) :: lrepmatw2_restart(npool) !! To restart opening files - INTEGER :: lrepmatw5_restart(npool) + INTEGER(KIND = i8b) :: lrepmatw5_restart(npool) !! To restart opening files INTEGER :: ctype !! Calculation type: -1 = hole, +1 = electron and 0 = both. @@ -285,7 +285,7 @@ COMPLEX(KIND = DP), ALLOCATABLE :: vmefp(:, :, :) !! Phonon velocity ! - CALL start_clock ('ephwann') + CALL start_clock('ephwann') ! IF (nbndsub /= nbnd) WRITE(stdout, '(/,5x,a,i4)' ) 'Band disentanglement is used: nbndsub = ', nbndsub ! @@ -310,8 +310,8 @@ w2(:) = zero ! IF (lpolar) THEN - WRITE(stdout, '(/,5x,a)' ) 'Computes the analytic long-range interaction for polar materials [lpolar]' - WRITE(stdout, '(5x,a)' ) ' ' + WRITE(stdout, '(/,5x,a)') 'Computes the analytic long-range interaction for polar materials [lpolar]' + WRITE(stdout, '(5x,a)') ' ' ENDIF ! ! Determine Wigner-Seitz points diff --git a/EPW/src/io_transport.f90 b/EPW/src/io_transport.f90 index 5404a9ffc..c43d6be63 100644 --- a/EPW/src/io_transport.f90 +++ b/EPW/src/io_transport.f90 @@ -25,7 +25,7 @@ !! This subroutine computes the transition probability and the scattering rates. !! Only the elements larger than threshold are saved on file. !! - USE kinds, ONLY : DP, i4b + USE kinds, ONLY : DP, i4b, i8b USE cell_base, ONLY : omega USE io_global, ONLY : stdout USE phcom, ONLY : nmodes @@ -61,9 +61,9 @@ !! Q-point index INTEGER, INTENT(in) :: totq !! Total number of q-points in selecq - INTEGER, INTENT(inout) :: lrepmatw2_restart(npool) + INTEGER(KIND = i8b), INTENT(inout) :: lrepmatw2_restart(npool) !! Current position inside the file during writing - INTEGER, INTENT(inout) :: lrepmatw5_restart(npool) + INTEGER(KIND = i8b), INTENT(inout) :: lrepmatw5_restart(npool) !! Current position inside the file during writing (electron) #if defined(__MPI) INTEGER(KIND = MPI_OFFSET_KIND), INTENT(inout) :: ind_tot @@ -1179,7 +1179,7 @@ ! ! This routine opens all the files needed to save scattering rates for the IBTE. ! - USE kinds, ONLY : DP + USE kinds, ONLY : DP, i8b USE io_files, ONLY : prefix, create_directory, delete_if_present USE io_var, ONLY : iunepmat, iunsparseq, & iunsparseqcb, iunepmatcb, iunrestart @@ -1195,14 +1195,14 @@ ! IMPLICIT NONE ! - INTEGER, INTENT(inout) :: lrepmatw2_restart(npool) + INTEGER(KIND = i8b), INTENT(inout) :: lrepmatw2_restart(npool) !! To restart opening files - INTEGER, INTENT(inout) :: lrepmatw5_restart(npool) + INTEGER(KIND = i8b), INTENT(inout) :: lrepmatw5_restart(npool) !! To restart opening files #if defined(__MPI) - INTEGER (KIND = MPI_OFFSET_KIND), INTENT(inout) :: ind_tot + INTEGER(KIND = MPI_OFFSET_KIND), INTENT(inout) :: ind_tot !! Total number of component for valence band - INTEGER (KIND = MPI_OFFSET_KIND), INTENT(inout) :: ind_totcb + INTEGER(KIND = MPI_OFFSET_KIND), INTENT(inout) :: ind_totcb !! Total number of component for the conduction band #else INTEGER(KIND = 8), INTENT(inout) :: ind_tot @@ -1227,9 +1227,9 @@ !! Dummy INTEGER for reading INTEGER :: ipool !! Pool index - INTEGER (KIND = 8) :: position_byte + INTEGER(KIND = 8) :: position_byte !! Position in the file in byte - REAL (KIND = DP) :: dummy_real + REAL(KIND = DP) :: dummy_real !! Dummy variable for reading ! WRITE(my_pool_id_ch, "(I0)") my_pool_id diff --git a/Modules/kind.f90 b/Modules/kind.f90 index 229c7d03f..25be42f45 100644 --- a/Modules/kind.f90 +++ b/Modules/kind.f90 @@ -6,59 +6,56 @@ ! or http://www.gnu.org/copyleft/gpl.txt . ! !------------------------------------------------------------------------------! - MODULE kinds +MODULE kinds !------------------------------------------------------------------------------! - + ! + IMPLICIT NONE + SAVE + ! ... kind definitions + INTEGER, PARAMETER :: DP = selected_real_kind(14,200) + INTEGER, PARAMETER :: sgl = selected_real_kind(6,30) + INTEGER, PARAMETER :: i4b = selected_int_kind(9) + INTEGER, PARAMETER :: i8b = selected_int_kind(18) + PRIVATE + PUBLIC :: i4b, i8b, sgl, DP, print_kind_info + ! + !------------------------------------------------------------------------------! + CONTAINS + !------------------------------------------------------------------------------! + ! + !! Print information about the used data types. + ! + SUBROUTINE print_kind_info (stdout) + !--------------------------------------------------------------------------! + ! IMPLICIT NONE - SAVE -! ... kind definitions - INTEGER, PARAMETER :: DP = selected_real_kind(14,200) - INTEGER, PARAMETER :: sgl = selected_real_kind(6,30) - INTEGER, PARAMETER :: i4b = selected_int_kind(9) - PRIVATE - PUBLIC :: i4b, sgl, DP, print_kind_info -! -!------------------------------------------------------------------------------! -! - CONTAINS -! -!------------------------------------------------------------------------------! -! -!! Print information about the used data types. -! - SUBROUTINE print_kind_info (stdout) -! -!------------------------------------------------------------------------------! -! - IMPLICIT NONE - INTEGER, INTENT(IN) :: stdout -! - WRITE( stdout,'(/,T2,A)') 'DATA TYPE INFORMATION:' -! - WRITE( stdout,'(/,T2,A,T78,A,2(/,T2,A,T75,I6),3(/,T2,A,T67,E14.8))') & + INTEGER, INTENT(IN) :: stdout + ! + WRITE(stdout,'(/,T2,A)') 'DATA TYPE INFORMATION:' + ! + WRITE(stdout,'(/,T2,A,T78,A,2(/,T2,A,T75,I6),3(/,T2,A,T67,E14.8))') & 'REAL: Data type name:', 'DP', ' Kind value:', kind(0.0_DP), & ' Precision:', precision(0.0_DP), & ' Smallest nonnegligible quantity relative to 1:', & epsilon(0.0_DP), ' Smallest positive number:', tiny(0.0_DP), & ' Largest representable number:', huge(0.0_DP) - WRITE( stdout,'(/,T2,A,T78,A,2(/,T2,A,T75,I6),3(/,T2,A,T67,E14.8))') & + WRITE(stdout,'(/,T2,A,T78,A,2(/,T2,A,T75,I6),3(/,T2,A,T67,E14.8))') & ' Data type name:', 'sgl', ' Kind value:', kind(0.0_sgl), & ' Precision:', precision(0.0_sgl), & ' Smallest nonnegligible quantity relative to 1:', & epsilon(0.0_sgl), ' Smallest positive number:', tiny(0.0_sgl), & ' Largest representable number:', huge(0.0_sgl) - WRITE( stdout,'(/,T2,A,T72,A,4(/,T2,A,T61,I20))') & + WRITE(stdout,'(/,T2,A,T72,A,4(/,T2,A,T61,I20))') & 'INTEGER: Data type name:', '(default)', ' Kind value:', & kind(0), ' Bit size:', bit_size(0), & ' Largest representable number:', huge(0) - WRITE( stdout,'(/,T2,A,T72,A,/,T2,A,T75,I6,/)') 'LOGICAL: Data type name:', & + WRITE(stdout,'(/,T2,A,T72,A,/,T2,A,T75,I6,/)') 'LOGICAL: Data type name:', & '(default)', ' Kind value:', kind(.TRUE.) - WRITE( stdout,'(/,T2,A,T72,A,/,T2,A,T75,I6,/)') & + WRITE(stdout,'(/,T2,A,T72,A,/,T2,A,T75,I6,/)') & 'CHARACTER: Data type name:', '(default)', ' Kind value:', & kind('C') -! - END SUBROUTINE print_kind_info -! + ! + END SUBROUTINE print_kind_info !------------------------------------------------------------------------------! - END MODULE kinds +END MODULE kinds !------------------------------------------------------------------------------! diff --git a/UtilXlib/mp.f90 b/UtilXlib/mp.f90 index 31606ac19..00017efa9 100644 --- a/UtilXlib/mp.f90 +++ b/UtilXlib/mp.f90 @@ -9,159 +9,155 @@ ! initialization and stopping, broadcast, parallel sum, etc. ! !------------------------------------------------------------------------------! - MODULE mp +MODULE mp !------------------------------------------------------------------------------! - USE util_param, ONLY : DP, stdout - USE parallel_include + USE util_param, ONLY : DP, stdout, i8b + USE parallel_include #if defined(__CUDA) - USE cudafor, ONLY: cudamemcpy, cudamemcpy2d, & - & cudaMemcpyDeviceToDevice, & - & cudaDeviceSynchronize + USE cudafor, ONLY : cudamemcpy, cudamemcpy2d, & + & cudaMemcpyDeviceToDevice, & + & cudaDeviceSynchronize #endif - ! - IMPLICIT NONE - PRIVATE - - PUBLIC :: mp_start, mp_abort, mp_stop, mp_end, & - mp_bcast, mp_sum, mp_max, mp_min, mp_rank, mp_size, & - mp_gather, mp_alltoall, mp_get, mp_put, & - mp_barrier, mp_report, mp_group_free, & - mp_root_sum, mp_comm_free, mp_comm_create, mp_comm_group, & - mp_group_create, mp_comm_split, mp_set_displs, & - mp_circular_shift_left, mp_circular_shift_left_start, & - mp_get_comm_null, mp_get_comm_self, mp_count_nodes, & - mp_type_create_column_section, mp_type_free, & - mp_allgather, mp_waitall, mp_testall - -! - INTERFACE mp_bcast - MODULE PROCEDURE mp_bcast_i1, mp_bcast_r1, mp_bcast_c1, & - mp_bcast_z, mp_bcast_zv, & - mp_bcast_iv, mp_bcast_rv, mp_bcast_cv, mp_bcast_l, mp_bcast_rm, & - mp_bcast_cm, mp_bcast_im, mp_bcast_it, mp_bcast_i4d, mp_bcast_rt, mp_bcast_lv, & - mp_bcast_lm, mp_bcast_r4d, mp_bcast_r5d, mp_bcast_ct, mp_bcast_c4d,& - mp_bcast_c5d, mp_bcast_c6d + ! + IMPLICIT NONE + ! + PUBLIC :: mp_start, mp_abort, mp_stop, mp_end, & + mp_bcast, mp_sum, mp_max, mp_min, mp_rank, mp_size, & + mp_gather, mp_alltoall, mp_get, mp_put, & + mp_barrier, mp_report, mp_group_free, & + mp_root_sum, mp_comm_free, mp_comm_create, mp_comm_group, & + mp_group_create, mp_comm_split, mp_set_displs, & + mp_circular_shift_left, mp_circular_shift_left_start, & + mp_get_comm_null, mp_get_comm_self, mp_count_nodes, & + mp_type_create_column_section, mp_type_free, & + mp_allgather, mp_waitall, mp_testall + ! + INTERFACE mp_bcast + MODULE PROCEDURE mp_bcast_i1, mp_bcast_r1, mp_bcast_c1, & + mp_bcast_z, mp_bcast_zv, & + mp_bcast_iv, mp_bcast_i8v, mp_bcast_rv, mp_bcast_cv, mp_bcast_l, mp_bcast_rm, & + mp_bcast_cm, mp_bcast_im, mp_bcast_it, mp_bcast_i4d, mp_bcast_rt, mp_bcast_lv, & + mp_bcast_lm, mp_bcast_r4d, mp_bcast_r5d, mp_bcast_ct, mp_bcast_c4d,& + mp_bcast_c5d, mp_bcast_c6d #if defined(__CUDA) - MODULE PROCEDURE mp_bcast_i1_gpu, mp_bcast_r1_gpu, mp_bcast_c1_gpu, & - !mp_bcast_z_gpu, mp_bcast_zv_gpu, & - mp_bcast_iv_gpu, mp_bcast_rv_gpu, mp_bcast_cv_gpu, mp_bcast_l_gpu, mp_bcast_rm_gpu, & - mp_bcast_cm_gpu, mp_bcast_im_gpu, mp_bcast_it_gpu, mp_bcast_i4d_gpu, mp_bcast_rt_gpu, mp_bcast_lv_gpu, & - mp_bcast_lm_gpu, mp_bcast_r4d_gpu, mp_bcast_r5d_gpu, mp_bcast_ct_gpu, mp_bcast_c4d_gpu,& - mp_bcast_c5d_gpu, mp_bcast_c6d_gpu + MODULE PROCEDURE mp_bcast_i1_gpu, mp_bcast_r1_gpu, mp_bcast_c1_gpu, & + !mp_bcast_z_gpu, mp_bcast_zv_gpu, & + mp_bcast_iv_gpu, mp_bcast_rv_gpu, mp_bcast_cv_gpu, mp_bcast_l_gpu, mp_bcast_rm_gpu, & + mp_bcast_cm_gpu, mp_bcast_im_gpu, mp_bcast_it_gpu, mp_bcast_i4d_gpu, mp_bcast_rt_gpu, mp_bcast_lv_gpu, & + mp_bcast_lm_gpu, mp_bcast_r4d_gpu, mp_bcast_r5d_gpu, mp_bcast_ct_gpu, mp_bcast_c4d_gpu,& + mp_bcast_c5d_gpu, mp_bcast_c6d_gpu #endif - END INTERFACE - - INTERFACE mp_sum - MODULE PROCEDURE mp_sum_i1, mp_sum_iv, mp_sum_im, mp_sum_it, mp_sum_i4, mp_sum_i5, & - mp_sum_r1, mp_sum_rv, mp_sum_rm, mp_sum_rt, mp_sum_r4d, & - mp_sum_c1, mp_sum_cv, mp_sum_cm, mp_sum_ct, mp_sum_c4d, & - mp_sum_c5d, mp_sum_c6d, mp_sum_rmm, mp_sum_cmm, mp_sum_r5d, & - mp_sum_r6d + END INTERFACE + ! + INTERFACE mp_sum + MODULE PROCEDURE mp_sum_i1, mp_sum_iv, mp_sum_i8v, mp_sum_im, mp_sum_it, mp_sum_i4, mp_sum_i5, & + mp_sum_r1, mp_sum_rv, mp_sum_rm, mp_sum_rt, mp_sum_r4d, & + mp_sum_c1, mp_sum_cv, mp_sum_cm, mp_sum_ct, mp_sum_c4d, & + mp_sum_c5d, mp_sum_c6d, mp_sum_rmm, mp_sum_cmm, mp_sum_r5d, & + mp_sum_r6d #if defined(__CUDA) - MODULE PROCEDURE mp_sum_i1_gpu, mp_sum_iv_gpu, mp_sum_im_gpu, mp_sum_it_gpu, & - mp_sum_r1_gpu, mp_sum_rv_gpu, mp_sum_rm_gpu, mp_sum_rt_gpu, mp_sum_r4d_gpu, & - mp_sum_c1_gpu, mp_sum_cv_gpu, mp_sum_cm_gpu, mp_sum_ct_gpu, mp_sum_c4d_gpu, & - mp_sum_c5d_gpu, mp_sum_c6d_gpu, mp_sum_rmm_gpu, mp_sum_cmm_gpu, mp_sum_r5d_gpu, & - mp_sum_r6d_gpu + MODULE PROCEDURE mp_sum_i1_gpu, mp_sum_iv_gpu, mp_sum_im_gpu, mp_sum_it_gpu, & + mp_sum_r1_gpu, mp_sum_rv_gpu, mp_sum_rm_gpu, mp_sum_rt_gpu, mp_sum_r4d_gpu, & + mp_sum_c1_gpu, mp_sum_cv_gpu, mp_sum_cm_gpu, mp_sum_ct_gpu, mp_sum_c4d_gpu, & + mp_sum_c5d_gpu, mp_sum_c6d_gpu, mp_sum_rmm_gpu, mp_sum_cmm_gpu, mp_sum_r5d_gpu, & + mp_sum_r6d_gpu #endif - END INTERFACE - - INTERFACE mp_root_sum - MODULE PROCEDURE mp_root_sum_rm, mp_root_sum_cm + END INTERFACE + ! + INTERFACE mp_root_sum + MODULE PROCEDURE mp_root_sum_rm, mp_root_sum_cm #if defined(__CUDA) - MODULE PROCEDURE mp_root_sum_rm_gpu, mp_root_sum_cm_gpu + MODULE PROCEDURE mp_root_sum_rm_gpu, mp_root_sum_cm_gpu #endif - END INTERFACE - - INTERFACE mp_get - MODULE PROCEDURE mp_get_r1, mp_get_rv, mp_get_cv, mp_get_i1, mp_get_iv, & - mp_get_rm, mp_get_cm + END INTERFACE + ! + INTERFACE mp_get + MODULE PROCEDURE mp_get_r1, mp_get_rv, mp_get_cv, mp_get_i1, mp_get_iv, mp_get_rm, mp_get_cm #if defined(__CUDA) - MODULE PROCEDURE mp_get_r1_gpu, mp_get_rv_gpu, mp_get_cv_gpu, mp_get_i1_gpu, mp_get_iv_gpu, & - mp_get_rm_gpu, mp_get_cm_gpu + MODULE PROCEDURE mp_get_r1_gpu, mp_get_rv_gpu, mp_get_cv_gpu, mp_get_i1_gpu, mp_get_iv_gpu, & + mp_get_rm_gpu, mp_get_cm_gpu #endif - END INTERFACE - - INTERFACE mp_put - MODULE PROCEDURE mp_put_rv, mp_put_cv, mp_put_i1, mp_put_iv, & - mp_put_rm + END INTERFACE + ! + INTERFACE mp_put + MODULE PROCEDURE mp_put_rv, mp_put_cv, mp_put_i1, mp_put_iv, & + mp_put_rm #if defined(__CUDA) - MODULE PROCEDURE mp_put_rv_gpu, mp_put_cv_gpu, mp_put_i1_gpu, mp_put_iv_gpu, & - mp_put_rm_gpu + MODULE PROCEDURE mp_put_rv_gpu, mp_put_cv_gpu, mp_put_i1_gpu, mp_put_iv_gpu, & + mp_put_rm_gpu #endif - END INTERFACE - - INTERFACE mp_max - MODULE PROCEDURE mp_max_i, mp_max_r, mp_max_rv, mp_max_iv + END INTERFACE + ! + INTERFACE mp_max + MODULE PROCEDURE mp_max_i, mp_max_r, mp_max_rv, mp_max_iv #if defined(__CUDA) - MODULE PROCEDURE mp_max_i_gpu, mp_max_r_gpu, mp_max_rv_gpu, mp_max_iv_gpu + MODULE PROCEDURE mp_max_i_gpu, mp_max_r_gpu, mp_max_rv_gpu, mp_max_iv_gpu #endif - END INTERFACE - - INTERFACE mp_min - MODULE PROCEDURE mp_min_i, mp_min_r, mp_min_rv, mp_min_iv + END INTERFACE + ! + INTERFACE mp_min + MODULE PROCEDURE mp_min_i, mp_min_r, mp_min_rv, mp_min_iv #if defined(__CUDA) - MODULE PROCEDURE mp_min_i_gpu, mp_min_r_gpu, mp_min_rv_gpu, mp_min_iv_gpu + MODULE PROCEDURE mp_min_i_gpu, mp_min_r_gpu, mp_min_rv_gpu, mp_min_iv_gpu #endif - END INTERFACE - - INTERFACE mp_gather - MODULE PROCEDURE mp_gather_i1, mp_gather_iv, mp_gatherv_rv, mp_gatherv_iv, & - mp_gatherv_rm, mp_gatherv_im, mp_gatherv_cv, & - mp_gatherv_inplace_cplx_array + END INTERFACE + ! + INTERFACE mp_gather + MODULE PROCEDURE mp_gather_i1, mp_gather_iv, mp_gatherv_rv, mp_gatherv_iv, & + mp_gatherv_rm, mp_gatherv_im, mp_gatherv_cv, & + mp_gatherv_inplace_cplx_array #if defined(__CUDA) - MODULE PROCEDURE mp_gather_i1_gpu, mp_gather_iv_gpu, mp_gatherv_rv_gpu, mp_gatherv_iv_gpu, & - mp_gatherv_rm_gpu, mp_gatherv_im_gpu, mp_gatherv_cv_gpu, mp_gatherv_inplace_cplx_array_gpu + MODULE PROCEDURE mp_gather_i1_gpu, mp_gather_iv_gpu, mp_gatherv_rv_gpu, mp_gatherv_iv_gpu, & + mp_gatherv_rm_gpu, mp_gatherv_im_gpu, mp_gatherv_cv_gpu, mp_gatherv_inplace_cplx_array_gpu #endif - END INTERFACE - - INTERFACE mp_allgather - MODULE PROCEDURE mp_allgatherv_inplace_cplx_array + END INTERFACE + ! + INTERFACE mp_allgather + MODULE PROCEDURE mp_allgatherv_inplace_cplx_array #if defined(__CUDA) - MODULE PROCEDURE mp_allgatherv_inplace_cplx_array_gpu + MODULE PROCEDURE mp_allgatherv_inplace_cplx_array_gpu #endif - END INTERFACE - - INTERFACE mp_alltoall - MODULE PROCEDURE mp_alltoall_c3d, mp_alltoall_i3d + END INTERFACE + ! + INTERFACE mp_alltoall + MODULE PROCEDURE mp_alltoall_c3d, mp_alltoall_i3d #if defined(__CUDA) - MODULE PROCEDURE mp_alltoall_c3d_gpu, mp_alltoall_i3d_gpu + MODULE PROCEDURE mp_alltoall_c3d_gpu, mp_alltoall_i3d_gpu #endif - END INTERFACE - - INTERFACE mp_circular_shift_left - MODULE PROCEDURE mp_circular_shift_left_i0, & - mp_circular_shift_left_i1, & - mp_circular_shift_left_i2, & - mp_circular_shift_left_r2d, & - mp_circular_shift_left_c2d + END INTERFACE + ! + INTERFACE mp_circular_shift_left + MODULE PROCEDURE mp_circular_shift_left_i0, & + mp_circular_shift_left_i1, & + mp_circular_shift_left_i2, & + mp_circular_shift_left_r2d, & + mp_circular_shift_left_c2d #if defined(__CUDA) - MODULE PROCEDURE mp_circular_shift_left_i0_gpu, & - mp_circular_shift_left_i1_gpu, & - mp_circular_shift_left_i2_gpu, & - mp_circular_shift_left_r2d_gpu, & - mp_circular_shift_left_c2d_gpu + MODULE PROCEDURE mp_circular_shift_left_i0_gpu, & + mp_circular_shift_left_i1_gpu, & + mp_circular_shift_left_i2_gpu, & + mp_circular_shift_left_r2d_gpu, & + mp_circular_shift_left_c2d_gpu #endif - END INTERFACE - - INTERFACE mp_circular_shift_left_start - MODULE PROCEDURE mp_circular_shift_left_start_i0, & - mp_circular_shift_left_start_i1, & - mp_circular_shift_left_start_i2, & - mp_circular_shift_left_start_r2d, & - mp_circular_shift_left_start_c2d - END INTERFACE - - INTERFACE mp_type_create_column_section - MODULE PROCEDURE mp_type_create_cplx_column_section + END INTERFACE + ! + INTERFACE mp_circular_shift_left_start + MODULE PROCEDURE mp_circular_shift_left_start_i0, & + mp_circular_shift_left_start_i1, & + mp_circular_shift_left_start_i2, & + mp_circular_shift_left_start_r2d, & + mp_circular_shift_left_start_c2d + END INTERFACE + ! + INTERFACE mp_type_create_column_section + MODULE PROCEDURE mp_type_create_cplx_column_section #if defined(__CUDA) - MODULE PROCEDURE mp_type_create_cplx_column_section_gpu + MODULE PROCEDURE mp_type_create_cplx_column_section_gpu #endif - END INTERFACE - + END INTERFACE !------------------------------------------------------------------------------! ! - CONTAINS + CONTAINS ! !------------------------------------------------------------------------------! ! @@ -433,22 +429,50 @@ CALL bcast_integer( msg, msglen, source, group ) #endif END SUBROUTINE mp_bcast_i1 -! -!------------------------------------------------------------------------------! - SUBROUTINE mp_bcast_iv(msg,source,gid) - IMPLICIT NONE - INTEGER :: msg(:) - INTEGER, INTENT(IN) :: source - INTEGER, INTENT(IN) :: gid + ! + !------------------------------------------------------------------------------! + SUBROUTINE mp_bcast_iv(msg, source, gid) + !------------------------------------------------------------------------------! + !! + !! Bcast an integer vector + !! + IMPLICIT NONE + ! + INTEGER :: msg(:) + INTEGER, INTENT(in) :: source + INTEGER, INTENT(in) :: gid #if defined(__MPI) - INTEGER :: msglen - msglen = size(msg) - CALL bcast_integer( msg, msglen, source, gid ) + INTEGER :: msglen + msglen = SIZE(msg) + CALL bcast_integer(msg, msglen, source, gid) #endif + !------------------------------------------------------------------------------! END SUBROUTINE mp_bcast_iv -! -!------------------------------------------------------------------------------! - SUBROUTINE mp_bcast_im( msg, source, gid ) + !------------------------------------------------------------------------------! + ! + !------------------------------------------------------------------------------! + SUBROUTINE mp_bcast_i8v(msg, source, gid) + !------------------------------------------------------------------------------! + !! + !! Bcast an integer vector of kind i8b. + !! + IMPLICIT NONE + ! + INTEGER(KIND = i8b) :: msg(:) + INTEGER, INTENT(in) :: source + INTEGER, INTENT(in) :: gid +#if defined(__MPI) + INTEGER :: msglen + msglen = SIZE(msg) + CALL bcast_integer8(msg, msglen, source, gid) +#endif + !------------------------------------------------------------------------------! + END SUBROUTINE mp_bcast_i8v + !------------------------------------------------------------------------------! + ! + !------------------------------------------------------------------------------! + SUBROUTINE mp_bcast_im(msg, source, gid) + !------------------------------------------------------------------------------! IMPLICIT NONE INTEGER :: msg(:,:) INTEGER, INTENT(IN) :: source @@ -456,7 +480,7 @@ #if defined(__MPI) INTEGER :: msglen msglen = size(msg) - CALL bcast_integer( msg, msglen, source, gid ) + CALL bcast_integer(msg, msglen, source, gid) #endif END SUBROUTINE mp_bcast_im ! @@ -1400,22 +1424,48 @@ CALL reduce_base_integer( msglen, msg, gid, -1 ) #endif END SUBROUTINE mp_sum_i1 -! -!------------------------------------------------------------------------------! - SUBROUTINE mp_sum_iv(msg,gid) - IMPLICIT NONE - INTEGER, INTENT (INOUT) :: msg(:) - INTEGER, INTENT(IN) :: gid + ! + !------------------------------------------------------------------------------! + SUBROUTINE mp_sum_iv(msg, gid) + !------------------------------------------------------------------------------! + !! + !! MPI sum an integer vector from all cores and bcast the result to all. + !! + IMPLICIT NONE + ! + INTEGER, INTENT(inout) :: msg(:) + INTEGER, INTENT(in) :: gid #if defined(__MPI) - INTEGER :: msglen - msglen = size(msg) - CALL reduce_base_integer( msglen, msg, gid, -1 ) + INTEGER :: msglen + msglen = SIZE(msg) + CALL reduce_base_integer(msglen, msg, gid, -1) #endif + !------------------------------------------------------------------------------! END SUBROUTINE mp_sum_iv -! -!------------------------------------------------------------------------------! - + !------------------------------------------------------------------------------! + ! + !------------------------------------------------------------------------------! + SUBROUTINE mp_sum_i8v(msg, gid) + !------------------------------------------------------------------------------! + !! + !! MPI sum an integer vector from all cores and bcast the result to all. + !! + IMPLICIT NONE + ! + INTEGER(KIND = i8b), INTENT(inout) :: msg(:) + INTEGER, INTENT(in) :: gid +#if defined(__MPI) + INTEGER :: msglen + msglen = SIZE(msg) + CALL reduce_base_integer8(msglen, msg, gid, -1) +#endif + !------------------------------------------------------------------------------! + END SUBROUTINE mp_sum_i8v + !------------------------------------------------------------------------------! + ! + !------------------------------------------------------------------------------! SUBROUTINE mp_sum_im(msg,gid) + !------------------------------------------------------------------------------! IMPLICIT NONE INTEGER, INTENT (INOUT) :: msg(:,:) INTEGER, INTENT(IN) :: gid @@ -5906,7 +5956,7 @@ END SUBROUTINE mp_type_free #endif !------------------------------------------------------------------------------! - END MODULE mp +END MODULE mp !------------------------------------------------------------------------------! ! ! Script to generate stop messages: diff --git a/UtilXlib/mp_base.f90 b/UtilXlib/mp_base.f90 index 209b735a8..30840d6ae 100644 --- a/UtilXlib/mp_base.f90 +++ b/UtilXlib/mp_base.f90 @@ -115,53 +115,110 @@ END SUBROUTINE mp_synchronize RETURN END SUBROUTINE bcast_real - - SUBROUTINE bcast_integer( array, n, root, gid ) - USE parallel_include - IMPLICIT NONE - INTEGER, INTENT(IN) :: n, root, gid - INTEGER :: array( n ) + !------------------------------------------------------------------------------! + SUBROUTINE bcast_integer(array, n, root, gid) + !------------------------------------------------------------------------------! + !! + !! Broadcast integers + !! + USE parallel_include + ! + IMPLICIT NONE + INTEGER, INTENT(in) :: n, root, gid + INTEGER :: array(n) #if defined __MPI - INTEGER :: msgsiz_max = __MSGSIZ_MAX - INTEGER :: nblk, blksiz, iblk, istart, ierr - + INTEGER :: msgsiz_max = __MSGSIZ_MAX + INTEGER :: nblk, blksiz, iblk, istart, ierr #if defined __TRACE - write(*,*) 'BCAST_INTEGER IN' + WRITE(*, *) 'BCAST_INTEGER IN' #endif - - IF( n <= 0 ) GO TO 1 - + IF(n <= 0) GOTO 1 + ! #if defined __USE_BARRIER - CALL mp_synchronize( gid ) + CALL mp_synchronize(gid) #endif - - IF( n <= msgsiz_max ) THEN - CALL MPI_BCAST( array, n, MPI_INTEGER, root, gid, ierr ) - IF( ierr /= 0 ) CALL errore( ' bcast_integer ', ' error in mpi_bcast 1 ', ierr ) - ELSE - nblk = n / msgsiz_max - blksiz = msgsiz_max - DO iblk = 1, nblk - istart = (iblk-1)*msgsiz_max + 1 - CALL MPI_BCAST( array( istart ), blksiz, MPI_INTEGER, root, gid, ierr ) - IF( ierr /= 0 ) CALL errore( ' bcast_integer ', ' error in mpi_bcast 2 ', ierr ) - END DO - blksiz = MOD( n, msgsiz_max ) - IF( blksiz > 0 ) THEN - istart = nblk * msgsiz_max + 1 - CALL MPI_BCAST( array( istart ), blksiz, MPI_INTEGER, root, gid, ierr ) - IF( ierr /= 0 ) CALL errore( ' bcast_integer ', ' error in mpi_bcast 3 ', ierr ) - END IF - END IF -1 CONTINUE + ! + IF(n <= msgsiz_max) THEN + CALL MPI_BCAST(array, n, MPI_INTEGER, root, gid, ierr) + IF(ierr /= 0) CALL errore(' bcast_integer ', ' error in mpi_bcast 1 ', ierr) + ELSE + nblk = n / msgsiz_max + blksiz = msgsiz_max + DO iblk = 1, nblk + istart = (iblk - 1) * msgsiz_max + 1 + CALL MPI_BCAST(array(istart), blksiz, MPI_INTEGER, root, gid, ierr ) + IF(ierr /= 0) CALL errore(' bcast_integer ', ' error in mpi_bcast 2 ', ierr) + ENDDO + blksiz = MOD( n, msgsiz_max ) + IF(blksiz > 0) THEN + istart = nblk * msgsiz_max + 1 + CALL MPI_BCAST(array( istart ), blksiz, MPI_INTEGER, root, gid, ierr) + IF(ierr /= 0) CALL errore(' bcast_integer ', ' error in mpi_bcast 3 ', ierr) + ENDIF + ENDIF +1 CONTINUE #if defined __TRACE - write(*,*) 'BCAST_INTEGER OUT' + WRITE(*, *) 'BCAST_INTEGER OUT' #endif #endif - RETURN + RETURN + !------------------------------------------------------------------------------! END SUBROUTINE bcast_integer - - + !------------------------------------------------------------------------------! + ! + !------------------------------------------------------------------------------! + SUBROUTINE bcast_integer8(array, n, root, gid) + !------------------------------------------------------------------------------! + !! + !! Broadcast integers + !! + USE util_param, ONLY : i8b + USE parallel_include + ! + IMPLICIT NONE + INTEGER, INTENT(in) :: n, root, gid + INTEGER(KIND = i8b) :: array(n) +#if defined __MPI + INTEGER :: msgsiz_max = __MSGSIZ_MAX + INTEGER :: nblk, blksiz, iblk, istart, ierr +#if defined __TRACE + WRITE(*, *) 'BCAST_INTEGER IN' +#endif + IF(n <= 0) GOTO 1 + ! +#if defined __USE_BARRIER + CALL mp_synchronize(gid) +#endif + ! + IF(n <= msgsiz_max) THEN + CALL MPI_BCAST(array, n, MPI_INTEGER8, root, gid, ierr) + IF(ierr /= 0) CALL errore(' bcast_integer8 ', ' error in mpi_bcast 1 ', ierr) + ELSE + nblk = n / msgsiz_max + blksiz = msgsiz_max + DO iblk = 1, nblk + istart = (iblk - 1) * msgsiz_max + 1 + CALL MPI_BCAST(array(istart), blksiz, MPI_INTEGER8, root, gid, ierr ) + IF(ierr /= 0) CALL errore(' bcast_integer8 ', ' error in mpi_bcast 2 ', ierr) + ENDDO + blksiz = MOD( n, msgsiz_max ) + IF(blksiz > 0) THEN + istart = nblk * msgsiz_max + 1 + CALL MPI_BCAST(array( istart ), blksiz, MPI_INTEGER8, root, gid, ierr) + IF(ierr /= 0) CALL errore(' bcast_integer8 ', ' error in mpi_bcast 3 ', ierr) + ENDIF + ENDIF +1 CONTINUE +#if defined __TRACE + WRITE(*, *) 'BCAST_INTEGER OUT' +#endif +#endif + RETURN + !------------------------------------------------------------------------------! + END SUBROUTINE bcast_integer8 + !------------------------------------------------------------------------------! + ! + !------------------------------------------------------------------------------! SUBROUTINE bcast_logical( array, n, root, gid ) USE parallel_include IMPLICIT NONE @@ -369,77 +426,128 @@ END SUBROUTINE reduce_base_real ! ! #if defined (__USE_INPLACE_MPI) -! -!---------------------------------------------------------------------------- -SUBROUTINE reduce_base_integer( dim, ps, comm, root ) + ! !---------------------------------------------------------------------------- - ! - ! ... sums a distributed variable ps(dim) over the processors. - ! ... This version uses a fixed-length buffer of appropriate (?) dim - ! - USE util_param, ONLY : DP + SUBROUTINE reduce_base_integer(dim, ps, comm, root) + !---------------------------------------------------------------------------- + !! + !! Sums a distributed variable ps(dim) over the processors. + !! This version uses a fixed-length buffer of appropriate (?) dim + !! + USE util_param, ONLY : DP USE parallel_include ! IMPLICIT NONE ! - INTEGER, INTENT(IN) :: dim - INTEGER :: ps(dim) - INTEGER, INTENT(IN) :: comm ! communicator - INTEGER, INTENT(IN) :: root ! if root < 0 perform a reduction to all procs - ! if root >= 0 perform a reduce only to root proc. + INTEGER, INTENT(in) :: dim + INTEGER, INTENT(inout) :: ps(dim) + INTEGER, INTENT(in) :: comm ! communicator + INTEGER, INTENT(in) :: root ! if root < 0 perform a reduction to all procs + ! if root >= 0 perform a reduce only to root proc. ! #if defined (__MPI) ! INTEGER :: info ! #if defined __TRACE - write(*,*) 'reduce_base_integer IN' + WRITE(*, *) 'reduce_base_integer IN' #endif ! ! ... synchronize processes ! #if defined __USE_BARRIER - CALL mp_synchronize( comm ) + CALL mp_synchronize(comm) #endif ! - IF( root >= 0 ) THEN - CALL MPI_REDUCE( MPI_IN_PLACE, ps, dim, MPI_INTEGER, MPI_SUM, root, comm, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_reduce 1', info ) + IF(root >= 0) THEN + CALL MPI_REDUCE(MPI_IN_PLACE, ps, dim, MPI_INTEGER, MPI_SUM, root, comm, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_reduce 1', info) ELSE - CALL MPI_ALLREDUCE( MPI_IN_PLACE, ps, dim, MPI_INTEGER, MPI_SUM, comm, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_allreduce 1', info ) - END IF + CALL MPI_ALLREDUCE(MPI_IN_PLACE, ps, dim, MPI_INTEGER, MPI_SUM, comm, info) + IF(info /= 0 ) CALL errore('reduce_base_integer', 'error in mpi_allreduce 1', info) + ENDIF ! #if defined __TRACE - write(*,*) 'reduce_base_integer OUT' + WRITE(*, *) 'reduce_base_integer OUT' #endif ! #endif ! RETURN - ! -END SUBROUTINE reduce_base_integer -! -#else -! -!---------------------------------------------------------------------------- -SUBROUTINE reduce_base_integer( dim, ps, comm, root ) + !---------------------------------------------------------------------------- + END SUBROUTINE reduce_base_integer !---------------------------------------------------------------------------- ! - ! ... sums a distributed variable ps(dim) over the processors. - ! ... This version uses a fixed-length buffer of appropriate (?) dim + !---------------------------------------------------------------------------- + SUBROUTINE reduce_base_integer8(dim, ps, comm, root) + !---------------------------------------------------------------------------- + !! + !! Sums a distributed variable ps(dim) over the processors. + !! This version uses a fixed-length buffer of appropriate (?) dim + !! + USE util_param, ONLY : DP, i8b + USE parallel_include ! - USE util_param, ONLY : DP + IMPLICIT NONE + ! + INTEGER, INTENT(in) :: dim + INTEGER(KIND = i8b), INTENT(inout) :: ps(dim) + INTEGER, INTENT(in) :: comm ! communicator + INTEGER, INTENT(in) :: root ! if root < 0 perform a reduction to all procs + ! if root >= 0 perform a reduce only to root proc. + ! +#if defined (__MPI) + ! + INTEGER :: info + ! +#if defined __TRACE + WRITE(*, *) 'reduce_base_integer IN' +#endif + ! + ! ... synchronize processes + ! +#if defined __USE_BARRIER + CALL mp_synchronize(comm) +#endif + ! + IF(root >= 0) THEN + CALL MPI_REDUCE(MPI_IN_PLACE, ps, dim, MPI_INTEGER8, MPI_SUM, root, comm, info) + IF(info /= 0) CALL errore('reduce_base_integer8', 'error in mpi_reduce 1', info) + ELSE + CALL MPI_ALLREDUCE(MPI_IN_PLACE, ps, dim, MPI_INTEGER8, MPI_SUM, comm, info) + IF(info /= 0 ) CALL errore('reduce_base_integer8', 'error in mpi_allreduce 1', info) + ENDIF + ! +#if defined __TRACE + WRITE(*, *) 'reduce_base_integer OUT' +#endif + ! +#endif + ! + RETURN + !---------------------------------------------------------------------------- + END SUBROUTINE reduce_base_integer8 + !---------------------------------------------------------------------------- +#else + ! + !---------------------------------------------------------------------------- + SUBROUTINE reduce_base_integer(dim, ps, comm, root) + !---------------------------------------------------------------------------- + !! + !! Sums a distributed variable ps(dim) over the processors. + !! This version uses a fixed-length buffer of appropriate (?) dim + !! + USE util_param, ONLY : DP USE data_buffer, ONLY : buff => mp_buff_i USE parallel_include ! IMPLICIT NONE ! - INTEGER, INTENT(IN) :: dim - INTEGER :: ps(dim) - INTEGER, INTENT(IN) :: comm ! communicator - INTEGER, INTENT(IN) :: root ! if root < 0 perform a reduction to all procs - ! if root >= 0 perform a reduce only to root proc. + INTEGER, INTENT(in) :: dim + INTEGER, INTENT(inout) :: ps(dim) + INTEGER, INTENT(in) :: comm ! communicator + INTEGER, INTENT(in) :: root ! if root < 0 perform a reduction to all procs + ! if root >= 0 perform a reduce only to root proc. ! #if defined (__MPI) ! @@ -447,81 +555,177 @@ SUBROUTINE reduce_base_integer( dim, ps, comm, root ) INTEGER, PARAMETER :: maxb = __MSGSIZ_MAX ! #if defined __TRACE - write(*,*) 'reduce_base_integer IN' + WRITE(*, *) 'reduce_base_integer IN' #endif ! - CALL mpi_comm_size( comm, nproc, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_comm_size', info ) - - CALL mpi_comm_rank( comm, myid, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_comm_rank', info ) + CALL mpi_comm_size(comm, nproc, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_comm_size', info) + ! + CALL mpi_comm_rank(comm, myid, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_comm_rank', info) ! - IF ( dim <= 0 .OR. nproc <= 1 ) GO TO 1 ! go to the end of the subroutine + IF (dim <= 0 .OR. nproc <= 1) GOTO 1 ! go to the end of the subroutine ! ! ... synchronize processes ! #if defined __USE_BARRIER - CALL mp_synchronize( comm ) + CALL mp_synchronize(comm) #endif ! nbuf = dim / maxb ! DO n = 1, nbuf - ! - IF( root >= 0 ) THEN - CALL MPI_REDUCE( ps(1+(n-1)*maxb), buff, maxb, MPI_INTEGER, MPI_SUM, root, comm, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_reduce 1', info ) - ELSE - CALL MPI_ALLREDUCE( ps(1+(n-1)*maxb), buff, maxb, MPI_INTEGER, MPI_SUM, comm, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_allreduce 1', info ) - END IF - ! - IF( root < 0 ) THEN - ps((1+(n-1)*maxb):(n*maxb)) = buff(1:maxb) - ELSE IF( root == myid ) THEN - ps((1+(n-1)*maxb):(n*maxb)) = buff(1:maxb) - END IF - ! - END DO + ! + IF (root >= 0) THEN + CALL MPI_REDUCE(ps(1 + (n - 1) * maxb), buff, maxb, MPI_INTEGER, MPI_SUM, root, comm, info) + IF (info /= 0 ) CALL errore('reduce_base_integer', 'error in mpi_reduce 1', info) + ELSE + CALL MPI_ALLREDUCE(ps(1 + (n - 1) * maxb), buff, maxb, MPI_INTEGER, MPI_SUM, comm, info) + IF (info /= 0) CALL errore('reduce_base_integer', 'error in mpi_allreduce 1', info) + ENDIF + ! + IF (root < 0 ) THEN + ps((1 + (n - 1) * maxb):(n * maxb)) = buff(1:maxb) + ELSEIF( root == myid) THEN + ps((1 + (n - 1) * maxb):(n * maxb)) = buff(1:maxb) + ENDIF + ! + ENDDO ! ! ... possible remaining elements < maxb ! - IF ( ( dim - nbuf * maxb ) > 0 ) THEN - ! - IF( root >= 0 ) THEN - CALL MPI_REDUCE( ps(1+nbuf*maxb), buff, (dim-nbuf*maxb), MPI_INTEGER, MPI_SUM, root, comm, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_reduce 2', info ) - ELSE - CALL MPI_ALLREDUCE( ps(1+nbuf*maxb), buff, (dim-nbuf*maxb), MPI_INTEGER, MPI_SUM, comm, info ) - IF( info /= 0 ) CALL errore( 'reduce_base_integer', 'error in mpi_allreduce 2', info ) - END IF - ! - IF( root < 0 ) THEN - ps((1+nbuf*maxb):dim) = buff(1:(dim-nbuf*maxb)) - ELSE IF( root == myid ) THEN - ps((1+nbuf*maxb):dim) = buff(1:(dim-nbuf*maxb)) - END IF - ! - END IF + IF ((dim - nbuf * maxb) > 0) THEN + ! + IF (root >= 0) THEN + CALL MPI_REDUCE(ps(1 + nbuf * maxb), buff, (dim - nbuf * maxb), MPI_INTEGER, MPI_SUM, root, comm, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_reduce 2', info) + ELSE + CALL MPI_ALLREDUCE(ps(1 + nbuf * maxb), buff, (dim - nbuf * maxb), MPI_INTEGER, MPI_SUM, comm, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_allreduce 2', info) + ENDIF + ! + IF(root < 0) THEN + ps((1 + nbuf * maxb):dim) = buff(1:(dim - nbuf * maxb)) + ELSEIF(root == myid) THEN + ps((1 + nbuf * maxb):dim) = buff(1:(dim - nbuf * maxb)) + ENDIF + ! + ENDIF ! 1 CONTINUE ! #if defined __TRACE - write(*,*) 'reduce_base_integer OUT' + WRITE(*, *) 'reduce_base_integer OUT' #endif ! #endif ! RETURN + !---------------------------------------------------------------------------- + END SUBROUTINE reduce_base_integer + !---------------------------------------------------------------------------- ! -END SUBROUTINE reduce_base_integer -! + !---------------------------------------------------------------------------- + SUBROUTINE reduce_base_integer8(dim, ps, comm, root) + !---------------------------------------------------------------------------- + !! + !! Sums a distributed variable ps(dim) over the processors. + !! This version uses a fixed-length buffer of appropriate (?) dim + !! + USE util_param, ONLY : DP, i8b + USE data_buffer, ONLY : buff => mp_buff_i + USE parallel_include + ! + IMPLICIT NONE + ! + INTEGER, INTENT(in) :: dim + INTEGER(KIND = i8b), INTENT(inout) :: ps(dim) + INTEGER, INTENT(in) :: comm ! communicator + INTEGER, INTENT(in) :: root ! if root < 0 perform a reduction to all procs + ! if root >= 0 perform a reduce only to root proc. + ! +#if defined (__MPI) + ! + INTEGER :: info, n, nbuf, nproc, myid + INTEGER, PARAMETER :: maxb = __MSGSIZ_MAX + ! +#if defined __TRACE + WRITE(*, *) 'reduce_base_integer8 IN' #endif -! -! ... "reduce"-like subroutines -! -!---------------------------------------------------------------------------- -SUBROUTINE reduce_base_real_to( dim, ps, psout, comm, root ) + ! + CALL mpi_comm_size(comm, nproc, info) + IF(info /= 0) CALL errore('reduce_base_integer8', 'error in mpi_comm_size', info) + ! + CALL mpi_comm_rank(comm, myid, info) + IF(info /= 0) CALL errore('reduce_base_integer8', 'error in mpi_comm_rank', info) + ! + IF (dim <= 0 .OR. nproc <= 1) GOTO 1 ! go to the end of the subroutine + ! + ! ... synchronize processes + ! +#if defined __USE_BARRIER + CALL mp_synchronize(comm) +#endif + ! + nbuf = dim / maxb + ! + DO n = 1, nbuf + ! + IF (root >= 0) THEN + CALL MPI_REDUCE(ps(1 + (n - 1) * maxb), buff, maxb, MPI_INTEGER8, MPI_SUM, root, comm, info) + IF (info /= 0 ) CALL errore('reduce_base_integer8', 'error in mpi_reduce 1', info) + ELSE + CALL MPI_ALLREDUCE(ps(1 + (n - 1) * maxb), buff, maxb, MPI_INTEGER8, MPI_SUM, comm, info) + IF (info /= 0) CALL errore('reduce_base_integer8', 'error in mpi_allreduce 1', info) + ENDIF + ! + IF (root < 0 ) THEN + ps((1 + (n - 1) * maxb):(n * maxb)) = buff(1:maxb) + ELSEIF( root == myid) THEN + ps((1 + (n - 1) * maxb):(n * maxb)) = buff(1:maxb) + ENDIF + ! + ENDDO + ! + ! ... possible remaining elements < maxb + ! + IF ((dim - nbuf * maxb) > 0) THEN + ! + IF (root >= 0) THEN + CALL MPI_REDUCE(ps(1 + nbuf * maxb), buff, (dim - nbuf * maxb), MPI_INTEGER8, MPI_SUM, root, comm, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_reduce 2', info) + ELSE + CALL MPI_ALLREDUCE(ps(1 + nbuf * maxb), buff, (dim - nbuf * maxb), MPI_INTEGER8, MPI_SUM, comm, info) + IF(info /= 0) CALL errore('reduce_base_integer', 'error in mpi_allreduce 2', info) + ENDIF + ! + IF(root < 0) THEN + ps((1 + nbuf * maxb):dim) = buff(1:(dim - nbuf * maxb)) + ELSEIF(root == myid) THEN + ps((1 + nbuf * maxb):dim) = buff(1:(dim - nbuf * maxb)) + ENDIF + ! + ENDIF + ! +1 CONTINUE + ! +#if defined __TRACE + WRITE(*, *) 'reduce_base_integer8 OUT' +#endif + ! +#endif + ! + RETURN + !---------------------------------------------------------------------------- + END SUBROUTINE reduce_base_integer8 + !---------------------------------------------------------------------------- + ! +#endif + ! + ! ... "reduce"-like subroutines + ! + !---------------------------------------------------------------------------- + SUBROUTINE reduce_base_real_to( dim, ps, psout, comm, root ) !---------------------------------------------------------------------------- ! ! ... sums a distributed variable ps(dim) over the processors, diff --git a/UtilXlib/util_param.f90 b/UtilXlib/util_param.f90 index 31293b7d4..32bd80695 100644 --- a/UtilXlib/util_param.f90 +++ b/UtilXlib/util_param.f90 @@ -6,12 +6,20 @@ ! in the root directory of the present distribution, ! or http://www.gnu.org/copyleft/gpl.txt . ! +!------------------------------------------------------------------------------! MODULE util_param - - USE parallel_include - - INTEGER, PARAMETER :: DP = selected_real_kind(14,200) - INTEGER, PARAMETER :: stdout = 6 ! unit connected to standard output - CHARACTER(LEN=5 ), PARAMETER :: crash_file = 'CRASH' - +!------------------------------------------------------------------------------! +!! +!! This module is a duplication of the Modules/kind.f90 one, placed here fore +!! convience. +!! +USE parallel_include +! +INTEGER, PARAMETER :: DP = selected_real_kind(14, 200) +INTEGER, PARAMETER :: i8b = selected_int_kind(18) +INTEGER, PARAMETER :: stdout = 6 ! unit connected to standard output +CHARACTER(LEN = 5), PARAMETER :: crash_file = 'CRASH' +! +!------------------------------------------------------------------------------! END MODULE util_param +!------------------------------------------------------------------------------!