CP pinned (page-locked) memory support

This commit is contained in:
Sergio Orlandini 2021-10-05 09:19:46 +02:00 committed by Pietro Delugas
parent bc15ade46a
commit 1696bd7822
10 changed files with 93 additions and 33 deletions

View File

@ -13,6 +13,12 @@
#define DEVICEATTR
#endif
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
!=----------------------------------------------------------------------------=!
MODULE cp_interfaces
!=----------------------------------------------------------------------------=!
@ -973,7 +979,7 @@
INTEGER, INTENT(IN) :: ngw, nkb, nbspx_bgrp, nbsp_bgrp
COMPLEX(DP), INTENT(IN), DEVICE :: betae( :, : )
COMPLEX(DP), INTENT(IN), DEVICE :: c_bgrp( :, : )
REAL(DP), INTENT(OUT) :: becdr_bgrp( :, :, : )
REAL(DP), INTENT(OUT) PINMEM :: becdr_bgrp( :, :, : )
END SUBROUTINE nlsm2_bgrp_gpu_x
#endif
END INTERFACE
@ -1033,9 +1039,9 @@
SUBROUTINE caldbec_bgrp_gpu_x( eigr, c_bgrp, dbec, idesc )
USE kinds, ONLY: DP
IMPLICIT NONE
COMPLEX(DP), INTENT(IN) :: eigr( :, : )
COMPLEX(DP), INTENT(IN), DEVICE :: c_bgrp( :, : )
REAL(DP), INTENT(OUT) :: dbec( :, :, :, : )
COMPLEX(DP), INTENT(IN) PINMEM :: eigr( :, : )
COMPLEX(DP), INTENT(IN), DEVICE :: c_bgrp( :, : )
REAL(DP), INTENT(OUT) PINMEM :: dbec( :, :, :, : )
INTEGER, INTENT(IN) :: idesc( :, : )
END SUBROUTINE caldbec_bgrp_gpu_x
#endif
@ -1097,7 +1103,7 @@
SUBROUTINE dbeta_eigr_gpu_x( dbeigr, eigr )
USE kinds, ONLY : DP
IMPLICIT NONE
COMPLEX(DP), INTENT(IN) :: eigr( :, : )
COMPLEX(DP), INTENT(IN) PINMEM :: eigr( :, : )
COMPLEX(DP), INTENT(OUT), DEVICE :: dbeigr( :, :, :, :)
END SUBROUTINE dbeta_eigr_gpu_x
#endif

View File

@ -9,6 +9,12 @@
! Written and revised by Carlo Cavazzoni
! Task Groups parallelization by C. Bekas (IBM Research Zurich).
!
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
!
!-------------------------------------------------------------------------
SUBROUTINE dforce_x ( i, bec, vkb, c, df, da, v, ldv, ispin, f, n, nspin, v1 )
!-----------------------------------------------------------------------
@ -64,7 +70,7 @@
!dir$ attributes align: 4096 :: af, aa, psi, exx_a, exx_b
#endif
#endif
REAL(DP), ALLOCATABLE :: af( :, : ), aa( :, : )
REAL(DP), ALLOCATABLE PINMEM :: af( :, : ), aa( :, : )
COMPLEX(DP), ALLOCATABLE :: psi(:)
REAL(DP) :: tmp1, tmp2 ! Lingzhu Kong
REAL(DP), ALLOCATABLE :: exx_a(:), exx_b(:) ! Lingzhu Kong
@ -371,7 +377,7 @@
REAL(DP) :: bec(:,:)
COMPLEX(DP), DEVICE :: vkb(:,:)
COMPLEX(DP), DEVICE :: c(:,:)
COMPLEX(DP) :: df(:), da(:)
COMPLEX(DP) PINMEM :: df(:), da(:)
INTEGER, INTENT(IN) :: ldv
REAL(DP), DEVICE :: v( :, : )
INTEGER :: ispin( : )
@ -388,7 +394,7 @@
COMPLEX(DP) :: fp, fm
complex(DP), parameter :: ci=(0.0d0,1.0d0)
REAL(DP), ALLOCATABLE :: af( :, : ), aa( :, : )
REAL(DP), ALLOCATABLE PINMEM :: af( :, : ), aa( :, : )
REAL(DP), ALLOCATABLE, DEVICE :: af_d( :, : ), aa_d( :, : )
COMPLEX(DP), ALLOCATABLE, DEVICE :: psi(:)
COMPLEX(DP), ALLOCATABLE, DEVICE :: df_d(:)

View File

@ -5,6 +5,11 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
!
!----------------------------------------------------------------------------
MODULE cp_main_variables
@ -27,7 +32,7 @@ MODULE cp_main_variables
! ... G = reciprocal lattice vectors
! ... R_I = ionic positions
!
COMPLEX(DP), ALLOCATABLE :: eigr(:,:) ! exp (i G dot R_I)
COMPLEX(DP), ALLOCATABLE PINMEM :: eigr(:,:) ! exp (i G dot R_I)
!
! ... structure factors (summed over atoms of the same kind)
!
@ -40,7 +45,7 @@ MODULE cp_main_variables
! ... indexes, positions, and structure factors for the box grid
!
REAL(DP), ALLOCATABLE :: taub(:,:)
COMPLEX(DP), ALLOCATABLE :: eigrb(:,:)
COMPLEX(DP), ALLOCATABLE PINMEM :: eigrb(:,:)
INTEGER, ALLOCATABLE :: irb(:,:)
INTEGER, ALLOCATABLE :: iabox(:)
INTEGER :: nabox
@ -54,10 +59,10 @@ MODULE cp_main_variables
!
REAL(DP), ALLOCATABLE :: bephi(:,:) ! distributed (orhto group)
REAL(DP), ALLOCATABLE :: becp_bgrp(:,:) ! distributed becp (band group)
REAL(DP), ALLOCATABLE :: bec_bgrp(:,:) ! distributed bec (band group)
REAL(DP), ALLOCATABLE PINMEM :: bec_bgrp(:,:) ! distributed bec (band group)
REAL(DP), ALLOCATABLE :: bec_d(:,:) ! distributed bec (band group)
REAL(DP), ALLOCATABLE :: becdr_bgrp(:,:,:) ! distributed becdr (band group)
REAL(DP), ALLOCATABLE :: dbec(:,:,:,:) ! derivative of bec distributed(ortho group)
REAL(DP), ALLOCATABLE PINMEM :: becdr_bgrp(:,:,:) ! distributed becdr (band group)
REAL(DP), ALLOCATABLE PINMEM :: dbec(:,:,:,:) ! derivative of bec distributed(ortho group)
#if defined (__CUDA)
ATTRIBUTES( DEVICE ) :: becp_bgrp, bephi, bec_d
#endif

View File

@ -11,6 +11,11 @@
#define DEVICEATTR
#endif
!
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
!
!-----------------------------------------------------------------------
subroutine nlsm1us_x ( n, betae, c, becp )
@ -409,7 +414,7 @@
integer, intent(in) :: ngw, nkb, nbspx_bgrp, nbsp_bgrp
complex(DP), intent(in), DEVICE :: c_bgrp(:,:)
complex(DP), intent(in), DEVICE :: betae(:,:)
real(DP), intent(out) :: becdr_bgrp(:,:,:)
real(DP), intent(out) PINMEM :: becdr_bgrp(:,:,:)
!
complex(DP), allocatable, DEVICE :: wrk2(:,:)
real(DP), allocatable, DEVICE :: becdr_d(:,:)
@ -710,7 +715,7 @@ SUBROUTINE dbeta_eigr_gpu_x( dbeigr, eigr )
include 'laxlib.fh'
!
complex(DP), device, intent(out) :: dbeigr( :, :, :, : )
complex(DP), intent(in) :: eigr(:,:)
complex(DP), intent(in) PINMEM :: eigr(:,:)
!
integer :: ig, is, iv, ia, l, inl, i, j
complex(DP) :: cfact(4)
@ -912,9 +917,9 @@ SUBROUTINE caldbec_bgrp_gpu_x( eigr, c_bgrp, dbec, idesc )
!
include 'laxlib.fh'
!
complex(DP), intent(in), device :: c_bgrp( :, : )
complex(DP), intent(in) :: eigr(:,:)
real(DP), intent(out) :: dbec( :, :, :, : )
complex(DP), intent(in), device :: c_bgrp( :, : )
complex(DP), intent(in) PINMEM :: eigr(:,:)
real(DP), intent(out) PINMEM :: dbec( :, :, :, : )
integer, intent(in) :: idesc( :, : )
!
integer :: ig, is, iv, ia, l, inl, i, j, ii, iw, iss, nr, ir, istart, nss

View File

@ -14,6 +14,12 @@
#define DEVICEATTR
#endif
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
MODULE ortho_module
!
#if defined(__CUDA)
@ -229,7 +235,7 @@ CONTAINS
INTEGER :: i, j, info, nr, nc, ir, ic
INTEGER, SAVE :: icnt = 1
REAL(DP), ALLOCATABLE :: rhos_h(:,:), s_h(:,:), rhod_h(:)
REAL(DP), ALLOCATABLE PINMEM :: rhos_h(:,:), s_h(:,:), rhod_h(:)
!
! ... Subroutine body
!

View File

@ -14,6 +14,12 @@
#define DEVICEATTR
#endif
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
MODULE orthogonalize_base
USE kinds
@ -956,8 +962,8 @@ CONTAINS
INTEGER, INTENT(IN) :: idesc( :, : )
COMPLEX(DP) :: cp_bgrp( :, : ), phi( :, : )
REAL(DP), INTENT(IN) :: ccc
REAL(DP) :: bec_bgrp( :, : ), x0( :, :, : )
REAL(DP) :: bephi( :, : )
REAL(DP) PINMEM :: bec_bgrp( :, : ), x0( :, :, : )
REAL(DP) PINMEM :: bephi( :, : )
REAL(DP) :: becp_bgrp( :, : )
! local variables
@ -966,7 +972,7 @@ CONTAINS
INTEGER :: ipr, ipc, root, i1, i2, nss, istart
INTEGER :: ibgrp_i, ibgrp_i_first, nbgrp_i, i_first
REAL(DP), ALLOCATABLE :: xd(:,:)
REAL(DP), ALLOCATABLE :: bephi_tmp(:,:)
REAL(DP), ALLOCATABLE PINMEM :: bephi_tmp(:,:)
INTEGER, ALLOCATABLE :: indi(:)
INTEGER :: np( 2 ), coor_ip( 2 ), leg_ortho
INTEGER :: idesc_ip(LAX_DESC_SIZE)

View File

@ -16,6 +16,12 @@
#define DEVICEATTR
#endif
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
SUBROUTINE runcp_uspp_x &
( nfi, fccc, ccc, ema0bg, dt2bye, rhos, bec_bgrp, c0_bgrp, c0_d, cm_bgrp, cm_d, fromscra, restart, compute_only_gradient )
@ -73,7 +79,7 @@
#endif
real(DP), allocatable :: emadt2(:)
real(DP), allocatable :: emaver(:)
complex(DP), allocatable :: c2(:), c3(:), c2tmp(:), c3tmp(:)
complex(DP), allocatable PINMEM :: c2(:), c3(:), c2tmp(:), c3tmp(:)
REAL(DP), ALLOCATABLE :: tg_rhos(:,:), ftmp(:)
#if defined (__CUDA)
REAL(DP), ALLOCATABLE, DEVICE :: rhos_d(:,:)
@ -395,7 +401,7 @@
REAL(DP) :: verl1, verl2, verl3
REAL(DP), ALLOCATABLE:: emadt2(:)
REAL(DP), ALLOCATABLE:: emaver(:)
COMPLEX(DP), ALLOCATABLE:: c2(:), c3(:)
COMPLEX(DP), ALLOCATABLE PINMEM :: c2(:), c3(:)
INTEGER :: i
INTEGER :: iflag
LOGICAL :: ttsde
@ -404,7 +410,7 @@
REAL(DP) :: ei_unp_mem, ei_unp_wfc
COMPLEX(DP) :: intermed3
REAL(DP), ALLOCATABLE :: occ(:)
COMPLEX(DP), ALLOCATABLE :: c4(:), c5(:)
COMPLEX(DP), ALLOCATABLE PINMEM :: c4(:), c5(:)
!
! ... Controlling on sic applicability
!

View File

@ -5,6 +5,12 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
!
!------------------------------------------------------------------------------!
MODULE electrons_base
!------------------------------------------------------------------------------!
@ -69,7 +75,7 @@
!! occupation numbers ( at gamma )
INTEGER, ALLOCATABLE :: ispin_bgrp(:)
!! spin of each state
INTEGER, ALLOCATABLE :: ibgrp_g2l(:)
INTEGER, ALLOCATABLE PINMEM :: ibgrp_g2l(:)
!! local index of the i-th global band index
#if defined (__CUDA)
ATTRIBUTES( DEVICE ) :: f_d

View File

@ -6,6 +6,12 @@
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
!=----------------------------------------------------------------------------=!
MODULE wavefunctions
!=----------------------------------------------------------------------------=!
@ -39,8 +45,8 @@
! distributed over gvector and bands
!
!dir$ attributes align: 4096 :: c0_bgrp, cm_bgrp, phi
COMPLEX(DP), ALLOCATABLE :: c0_bgrp(:,:) ! wave functions at time t
COMPLEX(DP), ALLOCATABLE :: cm_bgrp(:,:) ! wave functions at time t-delta t
COMPLEX(DP), ALLOCATABLE PINMEM :: c0_bgrp(:,:) ! wave functions at time t
COMPLEX(DP), ALLOCATABLE PINMEM :: cm_bgrp(:,:) ! wave functions at time t-delta t
COMPLEX(DP), ALLOCATABLE :: phi(:,:) ! |phi> = s'|c0> = |c0> + sum q_ij |i><j|c0>
COMPLEX(DP), ALLOCATABLE :: c0_d(:,:) ! wave functions at time t
COMPLEX(DP), ALLOCATABLE :: cm_d(:,:) ! wave functions at time t-delta t

View File

@ -5,6 +5,13 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined(__CUDA)
#define PINMEM ,PINNED
#else
#define PINMEM
#endif
MODULE uspp_param
!
! ... Ultrasoft and Norm-Conserving pseudopotential parameters
@ -122,7 +129,7 @@ MODULE uspp
INTEGER :: nkb, &! total number of beta functions, with struct.fact.
nkbus ! as above, for US-PP only
!
INTEGER, ALLOCATABLE ::&
INTEGER, ALLOCATABLE PINMEM ::&
indv(:,:), &! indes linking atomic beta's to beta's in the solid
nhtol(:,:), &! correspondence n <-> angular momentum l
nhtolm(:,:), &! correspondence n <-> combined lm index for (l,m)
@ -145,13 +152,14 @@ MODULE uspp
nlcc_any=.FALSE. ! if .TRUE. at least one pseudo has core corrections
!
!FIXME use !$acc declare create(vkb) to create and delete it automatically in the device
COMPLEX(DP), ALLOCATABLE, TARGET :: &
! be carefull cp still uses vkb_d for device
COMPLEX(DP), ALLOCATABLE, TARGET PINMEN :: &
vkb(:,:) ! all beta functions in reciprocal space
REAL(DP), ALLOCATABLE :: &
becsum(:,:,:) ! \sum_i f(i) <psi(i)|beta_l><beta_m|psi(i)>
REAL(DP), ALLOCATABLE :: &
ebecsum(:,:,:) ! \sum_i f(i) et(i) <psi(i)|beta_l><beta_m|psi(i)>
REAL(DP), ALLOCATABLE :: &
REAL(DP), ALLOCATABLE PINMEM :: &
dvan(:,:,:), &! the D functions of the solid
deeq(:,:,:,:), &! the integral of V_eff and Q_{nm}
qq_nt(:,:,:), &! the integral of q functions in the solid (ONE PER NTYP) used to be the qq array
@ -184,9 +192,9 @@ MODULE uspp
! spin-orbit coupling: qq and dvan are complex, qq has additional spin index
! noncolinear magnetism: deeq is complex (even in absence of spin-orbit)
!
REAL(DP), ALLOCATABLE :: &
REAL(DP), ALLOCATABLE PINMEM :: &
beta(:,:,:) ! beta functions for CP (without struct.factor)
REAL(DP), ALLOCATABLE :: &
REAL(DP), ALLOCATABLE PINMEM :: &
dbeta(:,:,:,:,:) ! derivative of beta functions w.r.t. cell for CP (without struct.factor)
!
CONTAINS