mirror of https://gitlab.com/QEF/q-e.git
Vxc_acc - rho_r2g on gpu
This commit is contained in:
parent
5162d496e6
commit
bba0b48bc8
|
@ -23,7 +23,7 @@ MODULE fft_helper_subroutines
|
|||
#endif
|
||||
END INTERFACE
|
||||
PRIVATE
|
||||
PUBLIC :: fftx_threed2oned, fftx_oned2threed
|
||||
PUBLIC :: fftx_threed2oned, fftx_threed2oned_gpu, fftx_oned2threed
|
||||
PUBLIC :: tg_reduce_rho
|
||||
PUBLIC :: tg_get_nnr, tg_get_recip_inc, fftx_ntgrp, fftx_tgpe, &
|
||||
tg_get_group_nr3
|
||||
|
@ -469,6 +469,8 @@ CONTAINS
|
|||
END SUBROUTINE
|
||||
|
||||
SUBROUTINE fftx_threed2oned( desc, vin, vout1, vout2 )
|
||||
!! Copy charge density from 3D array to 1D array in Fourier
|
||||
!! space.
|
||||
USE fft_param
|
||||
USE fft_types, ONLY : fft_type_descriptor
|
||||
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
||||
|
@ -490,7 +492,40 @@ CONTAINS
|
|||
END DO
|
||||
END IF
|
||||
END SUBROUTINE
|
||||
|
||||
|
||||
SUBROUTINE fftx_threed2oned_gpu( desc, vin_d, vout1_d, vout2_d )
|
||||
!! GPU version of \(\texttt{fftx_threed2oned}\).
|
||||
USE fft_param
|
||||
USE fft_types, ONLY : fft_type_descriptor
|
||||
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
||||
complex(DP), INTENT(OUT) :: vout1_d(:)
|
||||
complex(DP), OPTIONAL, INTENT(OUT) :: vout2_d(:)
|
||||
complex(DP), INTENT(IN) :: vin_d(:)
|
||||
INTEGER, POINTER :: nl_d(:), nlm_d(:)
|
||||
COMPLEX(DP) :: fp, fm
|
||||
INTEGER :: ig
|
||||
#if defined(__CUDA) && defined(_OPENACC)
|
||||
attributes(DEVICE) :: nl_d, nlm_d
|
||||
!$acc data deviceptr( vout1_d(:), vout2_d(:), vin_d(:) )
|
||||
nl_d => desc%nl_d
|
||||
nlm_d => desc%nlm_d
|
||||
IF( PRESENT( vout2_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ig=1,desc%ngm
|
||||
fp=vin_d(nl_d(ig))+vin_d(nlm_d(ig))
|
||||
fm=vin_d(nl_d(ig))-vin_d(nlm_d(ig))
|
||||
vout1_d(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
|
||||
vout2_d(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ig=1,desc%ngm
|
||||
vout1_d(ig) = vin_d(nl_d(ig))
|
||||
END DO
|
||||
END IF
|
||||
!$acc end data
|
||||
#endif
|
||||
END SUBROUTINE
|
||||
|
||||
SUBROUTINE fftx_psi2c_gamma( desc, vin, vout1, vout2 )
|
||||
USE fft_param
|
||||
|
|
|
@ -16,7 +16,7 @@ MODULE fft_rho
|
|||
!
|
||||
IMPLICIT NONE
|
||||
PRIVATE
|
||||
PUBLIC :: rho_r2g, rho_g2r
|
||||
PUBLIC :: rho_r2g, rho_g2r, rho_r2g_gpu
|
||||
!
|
||||
INTERFACE rho_g2r
|
||||
MODULE PROCEDURE rho_g2r_1, rho_g2r_2, rho_g2r_sum_components
|
||||
|
@ -24,7 +24,10 @@ MODULE fft_rho
|
|||
!
|
||||
CONTAINS
|
||||
!
|
||||
!-------------------------------------------------------------
|
||||
SUBROUTINE rho_r2g ( desc, rhor, rhog, v )
|
||||
!------------------------------------------------------
|
||||
!! Bring charge density rho from real to G- space
|
||||
USE fft_types, ONLY: fft_type_descriptor
|
||||
USE fft_helper_subroutines, ONLY: fftx_threed2oned
|
||||
!
|
||||
|
@ -93,6 +96,97 @@ CONTAINS
|
|||
|
||||
END SUBROUTINE rho_r2g
|
||||
!
|
||||
!-----------------------------------------------------------------
|
||||
SUBROUTINE rho_r2g_gpu( desc, rhor_d, rhog_d, v_d )
|
||||
!---------------------------------------------------------------
|
||||
!! Bring charge density rho from real to G- space - GPU version.
|
||||
!
|
||||
USE fft_types, ONLY: fft_type_descriptor
|
||||
USE fft_helper_subroutines, ONLY: fftx_threed2oned_gpu
|
||||
!
|
||||
TYPE(fft_type_descriptor), INTENT(in) :: desc
|
||||
REAL(dp), INTENT(in) :: rhor_d(:,:)
|
||||
!! rho in real space
|
||||
COMPLEX(dp), INTENT(OUT):: rhog_d(:,:)
|
||||
!! rho in G-space
|
||||
REAL(dp), OPTIONAL, INTENT(in) :: v_d(:)
|
||||
!
|
||||
! ... local variables
|
||||
!
|
||||
INTEGER :: ir, ig, iss, isup, isdw
|
||||
INTEGER :: nspin
|
||||
COMPLEX(dp):: fp, fm
|
||||
COMPLEX(dp), ALLOCATABLE :: psi_d(:)
|
||||
!
|
||||
!$acc data deviceptr( rhor_d(:,:), rhog_d(:,:), v_d(:) )
|
||||
!
|
||||
nspin= SIZE(rhor_d, 2)
|
||||
!
|
||||
ALLOCATE( psi_d(desc%nnr) )
|
||||
!$acc data create( psi_d(desc%nnr) )
|
||||
!$acc host_data use_device( psi_d )
|
||||
IF( nspin == 1 ) THEN
|
||||
iss=1
|
||||
IF( PRESENT( v_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss)+v_d(ir),0.0_dp,kind=dp)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss),0.0_dp,kind=dp)
|
||||
END DO
|
||||
END IF
|
||||
CALL fwfft('Rho', psi_d, desc )
|
||||
CALL fftx_threed2oned_gpu( desc, psi_d, rhog_d(:,iss) )
|
||||
ELSE
|
||||
IF ( gamma_only ) THEN
|
||||
! nspin/2 = 1 for LSDA, = 2 for noncolinear
|
||||
DO iss=1,nspin/2
|
||||
isup=1+(iss-1)*nspin/2 ! 1 for LSDA, 1 and 3 for noncolinear
|
||||
isdw=2+(iss-1)*nspin/2 ! 2 for LSDA, 2 and 4 for noncolinear
|
||||
IF( PRESENT( v_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,isup)+v_d(ir),rhor_d(ir,isdw)+v_d(ir),kind=dp)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,isup),rhor_d(ir,isdw),kind=dp)
|
||||
END DO
|
||||
END IF
|
||||
CALL fwfft('Rho', psi_d, desc )
|
||||
CALL fftx_threed2oned_gpu( desc, psi_d, rhog_d(:,isup), rhog_d(:,isdw) )
|
||||
END DO
|
||||
ELSE
|
||||
DO iss=1,nspin
|
||||
IF( PRESENT( v_d ) ) THEN
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss)+v_d(ir),0.0_dp,kind=dp)
|
||||
END DO
|
||||
ELSE
|
||||
!$acc parallel loop
|
||||
DO ir=1,desc%nnr
|
||||
psi_d(ir)=CMPLX(rhor_d(ir,iss),0.0_dp,kind=dp)
|
||||
END DO
|
||||
END IF
|
||||
CALL fwfft('Rho', psi_d, desc )
|
||||
CALL fftx_threed2oned_gpu( desc, psi_d, rhog_d(:,iss) )
|
||||
END DO
|
||||
END IF
|
||||
ENDIF
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
DEALLOCATE( psi_d )
|
||||
!
|
||||
!$acc end data
|
||||
!
|
||||
END SUBROUTINE rho_r2g_gpu
|
||||
!
|
||||
!
|
||||
SUBROUTINE rho_g2r_1 ( desc, rhog, rhor )
|
||||
USE fft_types, ONLY: fft_type_descriptor
|
||||
USE fft_helper_subroutines, ONLY: fftx_threed2oned, fftx_oned2threed
|
||||
|
|
|
@ -20,7 +20,7 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
USE noncollin_module, ONLY : domag
|
||||
USE fft_base, ONLY : dfftp
|
||||
USE fft_interfaces, ONLY : fwfft
|
||||
USE fft_rho, ONLY : rho_r2g
|
||||
USE fft_rho, ONLY : rho_r2g, rho_r2g_gpu
|
||||
USE control_flags, ONLY : use_gpu
|
||||
!
|
||||
IMPLICIT NONE
|
||||
|
@ -80,17 +80,17 @@ SUBROUTINE gradcorr( rho, rhog, rho_core, rhog_core, etxc, vtxc, v )
|
|||
IF ( nspin == 4 .AND. domag ) THEN
|
||||
!
|
||||
|
||||
!$acc data copyin(rho) copyout(rhoaux, segni )
|
||||
!$acc host_data use_device( rho, rhoaux, segni )
|
||||
!$acc data copyin(rho) copyout(rhoaux, rhogaux, segni )
|
||||
!$acc host_data use_device( rho, rhoaux, rhogaux, segni )
|
||||
IF ( use_gpu ) CALL compute_rho_gpu( rho, rhoaux, segni, dfftp%nnr )
|
||||
IF (.NOT. use_gpu) CALL compute_rho( rho, rhoaux, segni, dfftp%nnr )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
|
||||
!
|
||||
! ... bring starting rhoaux to G-space
|
||||
!
|
||||
CALL rho_r2g ( dfftp, rhoaux(:,1:nspin0), rhogaux(:,1:nspin0) )
|
||||
IF ( use_gpu ) CALL rho_r2g_gpu( dfftp, rhoaux(:,1:nspin0), rhogaux(:,1:nspin0) )
|
||||
IF (.NOT. use_gpu) CALL rho_r2g( dfftp, rhoaux(:,1:nspin0), rhogaux(:,1:nspin0) )
|
||||
!$acc end host_data
|
||||
!$acc end data
|
||||
!
|
||||
ELSE
|
||||
!
|
||||
|
|
Loading…
Reference in New Issue