mirror of https://gitlab.com/QEF/q-e.git
CP pinned (page-locked) memory support
This commit is contained in:
parent
bc15ade46a
commit
1696bd7822
|
@ -13,6 +13,12 @@
|
|||
#define DEVICEATTR
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
MODULE cp_interfaces
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
@ -973,7 +979,7 @@
|
|||
INTEGER, INTENT(IN) :: ngw, nkb, nbspx_bgrp, nbsp_bgrp
|
||||
COMPLEX(DP), INTENT(IN), DEVICE :: betae( :, : )
|
||||
COMPLEX(DP), INTENT(IN), DEVICE :: c_bgrp( :, : )
|
||||
REAL(DP), INTENT(OUT) :: becdr_bgrp( :, :, : )
|
||||
REAL(DP), INTENT(OUT) PINMEM :: becdr_bgrp( :, :, : )
|
||||
END SUBROUTINE nlsm2_bgrp_gpu_x
|
||||
#endif
|
||||
END INTERFACE
|
||||
|
@ -1033,9 +1039,9 @@
|
|||
SUBROUTINE caldbec_bgrp_gpu_x( eigr, c_bgrp, dbec, idesc )
|
||||
USE kinds, ONLY: DP
|
||||
IMPLICIT NONE
|
||||
COMPLEX(DP), INTENT(IN) :: eigr( :, : )
|
||||
COMPLEX(DP), INTENT(IN), DEVICE :: c_bgrp( :, : )
|
||||
REAL(DP), INTENT(OUT) :: dbec( :, :, :, : )
|
||||
COMPLEX(DP), INTENT(IN) PINMEM :: eigr( :, : )
|
||||
COMPLEX(DP), INTENT(IN), DEVICE :: c_bgrp( :, : )
|
||||
REAL(DP), INTENT(OUT) PINMEM :: dbec( :, :, :, : )
|
||||
INTEGER, INTENT(IN) :: idesc( :, : )
|
||||
END SUBROUTINE caldbec_bgrp_gpu_x
|
||||
#endif
|
||||
|
@ -1097,7 +1103,7 @@
|
|||
SUBROUTINE dbeta_eigr_gpu_x( dbeigr, eigr )
|
||||
USE kinds, ONLY : DP
|
||||
IMPLICIT NONE
|
||||
COMPLEX(DP), INTENT(IN) :: eigr( :, : )
|
||||
COMPLEX(DP), INTENT(IN) PINMEM :: eigr( :, : )
|
||||
COMPLEX(DP), INTENT(OUT), DEVICE :: dbeigr( :, :, :, :)
|
||||
END SUBROUTINE dbeta_eigr_gpu_x
|
||||
#endif
|
||||
|
|
|
@ -9,6 +9,12 @@
|
|||
! Written and revised by Carlo Cavazzoni
|
||||
! Task Groups parallelization by C. Bekas (IBM Research Zurich).
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
!
|
||||
!-------------------------------------------------------------------------
|
||||
SUBROUTINE dforce_x ( i, bec, vkb, c, df, da, v, ldv, ispin, f, n, nspin, v1 )
|
||||
!-----------------------------------------------------------------------
|
||||
|
@ -64,7 +70,7 @@
|
|||
!dir$ attributes align: 4096 :: af, aa, psi, exx_a, exx_b
|
||||
#endif
|
||||
#endif
|
||||
REAL(DP), ALLOCATABLE :: af( :, : ), aa( :, : )
|
||||
REAL(DP), ALLOCATABLE PINMEM :: af( :, : ), aa( :, : )
|
||||
COMPLEX(DP), ALLOCATABLE :: psi(:)
|
||||
REAL(DP) :: tmp1, tmp2 ! Lingzhu Kong
|
||||
REAL(DP), ALLOCATABLE :: exx_a(:), exx_b(:) ! Lingzhu Kong
|
||||
|
@ -371,7 +377,7 @@
|
|||
REAL(DP) :: bec(:,:)
|
||||
COMPLEX(DP), DEVICE :: vkb(:,:)
|
||||
COMPLEX(DP), DEVICE :: c(:,:)
|
||||
COMPLEX(DP) :: df(:), da(:)
|
||||
COMPLEX(DP) PINMEM :: df(:), da(:)
|
||||
INTEGER, INTENT(IN) :: ldv
|
||||
REAL(DP), DEVICE :: v( :, : )
|
||||
INTEGER :: ispin( : )
|
||||
|
@ -388,7 +394,7 @@
|
|||
COMPLEX(DP) :: fp, fm
|
||||
complex(DP), parameter :: ci=(0.0d0,1.0d0)
|
||||
|
||||
REAL(DP), ALLOCATABLE :: af( :, : ), aa( :, : )
|
||||
REAL(DP), ALLOCATABLE PINMEM :: af( :, : ), aa( :, : )
|
||||
REAL(DP), ALLOCATABLE, DEVICE :: af_d( :, : ), aa_d( :, : )
|
||||
COMPLEX(DP), ALLOCATABLE, DEVICE :: psi(:)
|
||||
COMPLEX(DP), ALLOCATABLE, DEVICE :: df_d(:)
|
||||
|
|
|
@ -5,6 +5,11 @@
|
|||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
!
|
||||
!----------------------------------------------------------------------------
|
||||
MODULE cp_main_variables
|
||||
|
@ -27,7 +32,7 @@ MODULE cp_main_variables
|
|||
! ... G = reciprocal lattice vectors
|
||||
! ... R_I = ionic positions
|
||||
!
|
||||
COMPLEX(DP), ALLOCATABLE :: eigr(:,:) ! exp (i G dot R_I)
|
||||
COMPLEX(DP), ALLOCATABLE PINMEM :: eigr(:,:) ! exp (i G dot R_I)
|
||||
!
|
||||
! ... structure factors (summed over atoms of the same kind)
|
||||
!
|
||||
|
@ -40,7 +45,7 @@ MODULE cp_main_variables
|
|||
! ... indexes, positions, and structure factors for the box grid
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: taub(:,:)
|
||||
COMPLEX(DP), ALLOCATABLE :: eigrb(:,:)
|
||||
COMPLEX(DP), ALLOCATABLE PINMEM :: eigrb(:,:)
|
||||
INTEGER, ALLOCATABLE :: irb(:,:)
|
||||
INTEGER, ALLOCATABLE :: iabox(:)
|
||||
INTEGER :: nabox
|
||||
|
@ -54,10 +59,10 @@ MODULE cp_main_variables
|
|||
!
|
||||
REAL(DP), ALLOCATABLE :: bephi(:,:) ! distributed (orhto group)
|
||||
REAL(DP), ALLOCATABLE :: becp_bgrp(:,:) ! distributed becp (band group)
|
||||
REAL(DP), ALLOCATABLE :: bec_bgrp(:,:) ! distributed bec (band group)
|
||||
REAL(DP), ALLOCATABLE PINMEM :: bec_bgrp(:,:) ! distributed bec (band group)
|
||||
REAL(DP), ALLOCATABLE :: bec_d(:,:) ! distributed bec (band group)
|
||||
REAL(DP), ALLOCATABLE :: becdr_bgrp(:,:,:) ! distributed becdr (band group)
|
||||
REAL(DP), ALLOCATABLE :: dbec(:,:,:,:) ! derivative of bec distributed(ortho group)
|
||||
REAL(DP), ALLOCATABLE PINMEM :: becdr_bgrp(:,:,:) ! distributed becdr (band group)
|
||||
REAL(DP), ALLOCATABLE PINMEM :: dbec(:,:,:,:) ! derivative of bec distributed(ortho group)
|
||||
#if defined (__CUDA)
|
||||
ATTRIBUTES( DEVICE ) :: becp_bgrp, bephi, bec_d
|
||||
#endif
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
#define DEVICEATTR
|
||||
#endif
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
!
|
||||
!-----------------------------------------------------------------------
|
||||
subroutine nlsm1us_x ( n, betae, c, becp )
|
||||
|
@ -409,7 +414,7 @@
|
|||
integer, intent(in) :: ngw, nkb, nbspx_bgrp, nbsp_bgrp
|
||||
complex(DP), intent(in), DEVICE :: c_bgrp(:,:)
|
||||
complex(DP), intent(in), DEVICE :: betae(:,:)
|
||||
real(DP), intent(out) :: becdr_bgrp(:,:,:)
|
||||
real(DP), intent(out) PINMEM :: becdr_bgrp(:,:,:)
|
||||
!
|
||||
complex(DP), allocatable, DEVICE :: wrk2(:,:)
|
||||
real(DP), allocatable, DEVICE :: becdr_d(:,:)
|
||||
|
@ -710,7 +715,7 @@ SUBROUTINE dbeta_eigr_gpu_x( dbeigr, eigr )
|
|||
include 'laxlib.fh'
|
||||
!
|
||||
complex(DP), device, intent(out) :: dbeigr( :, :, :, : )
|
||||
complex(DP), intent(in) :: eigr(:,:)
|
||||
complex(DP), intent(in) PINMEM :: eigr(:,:)
|
||||
!
|
||||
integer :: ig, is, iv, ia, l, inl, i, j
|
||||
complex(DP) :: cfact(4)
|
||||
|
@ -912,9 +917,9 @@ SUBROUTINE caldbec_bgrp_gpu_x( eigr, c_bgrp, dbec, idesc )
|
|||
!
|
||||
include 'laxlib.fh'
|
||||
!
|
||||
complex(DP), intent(in), device :: c_bgrp( :, : )
|
||||
complex(DP), intent(in) :: eigr(:,:)
|
||||
real(DP), intent(out) :: dbec( :, :, :, : )
|
||||
complex(DP), intent(in), device :: c_bgrp( :, : )
|
||||
complex(DP), intent(in) PINMEM :: eigr(:,:)
|
||||
real(DP), intent(out) PINMEM :: dbec( :, :, :, : )
|
||||
integer, intent(in) :: idesc( :, : )
|
||||
!
|
||||
integer :: ig, is, iv, ia, l, inl, i, j, ii, iw, iss, nr, ir, istart, nss
|
||||
|
|
|
@ -14,6 +14,12 @@
|
|||
#define DEVICEATTR
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
|
||||
MODULE ortho_module
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
|
@ -229,7 +235,7 @@ CONTAINS
|
|||
|
||||
INTEGER :: i, j, info, nr, nc, ir, ic
|
||||
INTEGER, SAVE :: icnt = 1
|
||||
REAL(DP), ALLOCATABLE :: rhos_h(:,:), s_h(:,:), rhod_h(:)
|
||||
REAL(DP), ALLOCATABLE PINMEM :: rhos_h(:,:), s_h(:,:), rhod_h(:)
|
||||
!
|
||||
! ... Subroutine body
|
||||
!
|
||||
|
|
|
@ -14,6 +14,12 @@
|
|||
#define DEVICEATTR
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
|
||||
MODULE orthogonalize_base
|
||||
|
||||
USE kinds
|
||||
|
@ -956,8 +962,8 @@ CONTAINS
|
|||
INTEGER, INTENT(IN) :: idesc( :, : )
|
||||
COMPLEX(DP) :: cp_bgrp( :, : ), phi( :, : )
|
||||
REAL(DP), INTENT(IN) :: ccc
|
||||
REAL(DP) :: bec_bgrp( :, : ), x0( :, :, : )
|
||||
REAL(DP) :: bephi( :, : )
|
||||
REAL(DP) PINMEM :: bec_bgrp( :, : ), x0( :, :, : )
|
||||
REAL(DP) PINMEM :: bephi( :, : )
|
||||
REAL(DP) :: becp_bgrp( :, : )
|
||||
|
||||
! local variables
|
||||
|
@ -966,7 +972,7 @@ CONTAINS
|
|||
INTEGER :: ipr, ipc, root, i1, i2, nss, istart
|
||||
INTEGER :: ibgrp_i, ibgrp_i_first, nbgrp_i, i_first
|
||||
REAL(DP), ALLOCATABLE :: xd(:,:)
|
||||
REAL(DP), ALLOCATABLE :: bephi_tmp(:,:)
|
||||
REAL(DP), ALLOCATABLE PINMEM :: bephi_tmp(:,:)
|
||||
INTEGER, ALLOCATABLE :: indi(:)
|
||||
INTEGER :: np( 2 ), coor_ip( 2 ), leg_ortho
|
||||
INTEGER :: idesc_ip(LAX_DESC_SIZE)
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
#define DEVICEATTR
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
|
||||
|
||||
SUBROUTINE runcp_uspp_x &
|
||||
( nfi, fccc, ccc, ema0bg, dt2bye, rhos, bec_bgrp, c0_bgrp, c0_d, cm_bgrp, cm_d, fromscra, restart, compute_only_gradient )
|
||||
|
@ -73,7 +79,7 @@
|
|||
#endif
|
||||
real(DP), allocatable :: emadt2(:)
|
||||
real(DP), allocatable :: emaver(:)
|
||||
complex(DP), allocatable :: c2(:), c3(:), c2tmp(:), c3tmp(:)
|
||||
complex(DP), allocatable PINMEM :: c2(:), c3(:), c2tmp(:), c3tmp(:)
|
||||
REAL(DP), ALLOCATABLE :: tg_rhos(:,:), ftmp(:)
|
||||
#if defined (__CUDA)
|
||||
REAL(DP), ALLOCATABLE, DEVICE :: rhos_d(:,:)
|
||||
|
@ -395,7 +401,7 @@
|
|||
REAL(DP) :: verl1, verl2, verl3
|
||||
REAL(DP), ALLOCATABLE:: emadt2(:)
|
||||
REAL(DP), ALLOCATABLE:: emaver(:)
|
||||
COMPLEX(DP), ALLOCATABLE:: c2(:), c3(:)
|
||||
COMPLEX(DP), ALLOCATABLE PINMEM :: c2(:), c3(:)
|
||||
INTEGER :: i
|
||||
INTEGER :: iflag
|
||||
LOGICAL :: ttsde
|
||||
|
@ -404,7 +410,7 @@
|
|||
REAL(DP) :: ei_unp_mem, ei_unp_wfc
|
||||
COMPLEX(DP) :: intermed3
|
||||
REAL(DP), ALLOCATABLE :: occ(:)
|
||||
COMPLEX(DP), ALLOCATABLE :: c4(:), c5(:)
|
||||
COMPLEX(DP), ALLOCATABLE PINMEM :: c4(:), c5(:)
|
||||
!
|
||||
! ... Controlling on sic applicability
|
||||
!
|
||||
|
|
|
@ -5,6 +5,12 @@
|
|||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
!
|
||||
!------------------------------------------------------------------------------!
|
||||
MODULE electrons_base
|
||||
!------------------------------------------------------------------------------!
|
||||
|
@ -69,7 +75,7 @@
|
|||
!! occupation numbers ( at gamma )
|
||||
INTEGER, ALLOCATABLE :: ispin_bgrp(:)
|
||||
!! spin of each state
|
||||
INTEGER, ALLOCATABLE :: ibgrp_g2l(:)
|
||||
INTEGER, ALLOCATABLE PINMEM :: ibgrp_g2l(:)
|
||||
!! local index of the i-th global band index
|
||||
#if defined (__CUDA)
|
||||
ATTRIBUTES( DEVICE ) :: f_d
|
||||
|
|
|
@ -6,6 +6,12 @@
|
|||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
|
||||
!=----------------------------------------------------------------------------=!
|
||||
MODULE wavefunctions
|
||||
!=----------------------------------------------------------------------------=!
|
||||
|
@ -39,8 +45,8 @@
|
|||
! distributed over gvector and bands
|
||||
!
|
||||
!dir$ attributes align: 4096 :: c0_bgrp, cm_bgrp, phi
|
||||
COMPLEX(DP), ALLOCATABLE :: c0_bgrp(:,:) ! wave functions at time t
|
||||
COMPLEX(DP), ALLOCATABLE :: cm_bgrp(:,:) ! wave functions at time t-delta t
|
||||
COMPLEX(DP), ALLOCATABLE PINMEM :: c0_bgrp(:,:) ! wave functions at time t
|
||||
COMPLEX(DP), ALLOCATABLE PINMEM :: cm_bgrp(:,:) ! wave functions at time t-delta t
|
||||
COMPLEX(DP), ALLOCATABLE :: phi(:,:) ! |phi> = s'|c0> = |c0> + sum q_ij |i><j|c0>
|
||||
COMPLEX(DP), ALLOCATABLE :: c0_d(:,:) ! wave functions at time t
|
||||
COMPLEX(DP), ALLOCATABLE :: cm_d(:,:) ! wave functions at time t-delta t
|
||||
|
|
|
@ -5,6 +5,13 @@
|
|||
! in the root directory of the present distribution,
|
||||
! or http://www.gnu.org/copyleft/gpl.txt .
|
||||
!
|
||||
|
||||
#if defined(__CUDA)
|
||||
#define PINMEM ,PINNED
|
||||
#else
|
||||
#define PINMEM
|
||||
#endif
|
||||
|
||||
MODULE uspp_param
|
||||
!
|
||||
! ... Ultrasoft and Norm-Conserving pseudopotential parameters
|
||||
|
@ -122,7 +129,7 @@ MODULE uspp
|
|||
INTEGER :: nkb, &! total number of beta functions, with struct.fact.
|
||||
nkbus ! as above, for US-PP only
|
||||
!
|
||||
INTEGER, ALLOCATABLE ::&
|
||||
INTEGER, ALLOCATABLE PINMEM ::&
|
||||
indv(:,:), &! indes linking atomic beta's to beta's in the solid
|
||||
nhtol(:,:), &! correspondence n <-> angular momentum l
|
||||
nhtolm(:,:), &! correspondence n <-> combined lm index for (l,m)
|
||||
|
@ -145,13 +152,14 @@ MODULE uspp
|
|||
nlcc_any=.FALSE. ! if .TRUE. at least one pseudo has core corrections
|
||||
!
|
||||
!FIXME use !$acc declare create(vkb) to create and delete it automatically in the device
|
||||
COMPLEX(DP), ALLOCATABLE, TARGET :: &
|
||||
! be carefull cp still uses vkb_d for device
|
||||
COMPLEX(DP), ALLOCATABLE, TARGET PINMEN :: &
|
||||
vkb(:,:) ! all beta functions in reciprocal space
|
||||
REAL(DP), ALLOCATABLE :: &
|
||||
becsum(:,:,:) ! \sum_i f(i) <psi(i)|beta_l><beta_m|psi(i)>
|
||||
REAL(DP), ALLOCATABLE :: &
|
||||
ebecsum(:,:,:) ! \sum_i f(i) et(i) <psi(i)|beta_l><beta_m|psi(i)>
|
||||
REAL(DP), ALLOCATABLE :: &
|
||||
REAL(DP), ALLOCATABLE PINMEM :: &
|
||||
dvan(:,:,:), &! the D functions of the solid
|
||||
deeq(:,:,:,:), &! the integral of V_eff and Q_{nm}
|
||||
qq_nt(:,:,:), &! the integral of q functions in the solid (ONE PER NTYP) used to be the qq array
|
||||
|
@ -184,9 +192,9 @@ MODULE uspp
|
|||
! spin-orbit coupling: qq and dvan are complex, qq has additional spin index
|
||||
! noncolinear magnetism: deeq is complex (even in absence of spin-orbit)
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: &
|
||||
REAL(DP), ALLOCATABLE PINMEM :: &
|
||||
beta(:,:,:) ! beta functions for CP (without struct.factor)
|
||||
REAL(DP), ALLOCATABLE :: &
|
||||
REAL(DP), ALLOCATABLE PINMEM :: &
|
||||
dbeta(:,:,:,:,:) ! derivative of beta functions w.r.t. cell for CP (without struct.factor)
|
||||
!
|
||||
CONTAINS
|
||||
|
|
Loading…
Reference in New Issue