mirror of https://gitlab.com/QEF/q-e.git
Some more comments
This commit is contained in:
parent
076280880c
commit
38cb7372f6
|
@ -98,8 +98,6 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
IF (isign < 0) THEN
|
IF (isign < 0) THEN
|
||||||
!print *,"exec cufft FWD",nz,ldz,nsl
|
|
||||||
!call flush(6)
|
|
||||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_FORWARD )
|
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_FORWARD )
|
||||||
tscale = 1.0_DP / nz
|
tscale = 1.0_DP / nz
|
||||||
IF (is_inplace) THEN
|
IF (is_inplace) THEN
|
||||||
|
@ -114,12 +112,10 @@
|
||||||
END DO
|
END DO
|
||||||
END IF
|
END IF
|
||||||
ELSE IF (isign > 0) THEN
|
ELSE IF (isign > 0) THEN
|
||||||
!print *,"exec cufft INV",nz,ldz,nsl
|
|
||||||
!call flush(6)
|
|
||||||
IF (is_inplace) THEN
|
IF (is_inplace) THEN
|
||||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_INVERSE ) !CUFFT_FORWARD )
|
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_INVERSE )
|
||||||
ELSE
|
ELSE
|
||||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), cout_d(1), CUFFT_INVERSE ) !CUFFT_FORWARD )
|
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), cout_d(1), CUFFT_INVERSE )
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
|
|
||||||
|
@ -163,10 +159,6 @@
|
||||||
DATA_DIM, STRIDE, DIST, &
|
DATA_DIM, STRIDE, DIST, &
|
||||||
CUFFT_Z2Z, BATCH )
|
CUFFT_Z2Z, BATCH )
|
||||||
|
|
||||||
#if defined(__CUDA_DEBUG)
|
|
||||||
print *,"INIT CUFFT Z PLAN: ",nz,"x",nsl,"x",ldz
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef TRACK_FLOPS
|
#ifdef TRACK_FLOPS
|
||||||
zflops( icurrent ) = 5.0d0 * REAL( nz ) * log( REAL( nz ) )/log( 2.d0 )
|
zflops( icurrent ) = 5.0d0 * REAL( nz ) * log( REAL( nz ) )/log( 2.d0 )
|
||||||
#endif
|
#endif
|
||||||
|
@ -428,10 +420,6 @@
|
||||||
DATA_DIM, STRIDE, DIST, &
|
DATA_DIM, STRIDE, DIST, &
|
||||||
CUFFT_Z2Z, BATCH )
|
CUFFT_Z2Z, BATCH )
|
||||||
|
|
||||||
#if defined(__CUDA_DEBUG)
|
|
||||||
print *,"INIT CUFFT ALL_XY PLAN: ",nx,"x",ny,"x",nzl,"ldx:",ldx,"batch:",batch_1,batch_2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
INTEGER, PARAMETER :: RANK=1
|
INTEGER, PARAMETER :: RANK=1
|
||||||
INTEGER :: FFT_DIM_X(RANK), DATA_DIM_X(RANK), FFT_DIM_Y(RANK), DATA_DIM_Y(RANK)
|
INTEGER :: FFT_DIM_X(RANK), DATA_DIM_X(RANK), FFT_DIM_Y(RANK), DATA_DIM_Y(RANK)
|
||||||
|
@ -455,9 +443,6 @@
|
||||||
IF( cufft_plan_y( 1, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(1,icurrent) )
|
IF( cufft_plan_y( 1, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(1,icurrent) )
|
||||||
IF( cufft_plan_y( 2, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(2,icurrent) )
|
IF( cufft_plan_y( 2, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(2,icurrent) )
|
||||||
|
|
||||||
#if defined(__CUDA_DEBUG)
|
|
||||||
print *,"INIT CUFFT XY PLAN: ",nx,"x",ny,"x",nzl,"ldx:",ldx,"batch:",batch_1,batch_2
|
|
||||||
#endif
|
|
||||||
|
|
||||||
istat = cufftPlanMany( cufft_plan_x( icurrent), RANK, FFT_DIM_X, &
|
istat = cufftPlanMany( cufft_plan_x( icurrent), RANK, FFT_DIM_X, &
|
||||||
DATA_DIM_X, STRIDE_X, DIST_X, &
|
DATA_DIM_X, STRIDE_X, DIST_X, &
|
||||||
|
@ -567,11 +552,9 @@
|
||||||
DO i=1, ldx*ldy*ldz*howmany
|
DO i=1, ldx*ldy*ldz*howmany
|
||||||
f_d( i ) = f_d( i ) * tscale
|
f_d( i ) = f_d( i ) * tscale
|
||||||
END DO
|
END DO
|
||||||
! call ZDSCAL( nx * ny * nz, tscale, f_d(1), 1)
|
|
||||||
|
|
||||||
ELSE IF( isign > 0 ) THEN
|
ELSE IF( isign > 0 ) THEN
|
||||||
|
|
||||||
! call FFTW_INPLACE_DRV_3D( bw_plan(ip), 1, f_d(1), 1, 1 )
|
|
||||||
istat = cufftExecZ2Z( cufft_plan_3d(ip), f_d(1), f_d(1), CUFFT_INVERSE )
|
istat = cufftExecZ2Z( cufft_plan_3d(ip), f_d(1), f_d(1), CUFFT_INVERSE )
|
||||||
|
|
||||||
END IF
|
END IF
|
||||||
|
@ -642,6 +625,9 @@
|
||||||
! This routine is implemented only for fftw, essl, acml
|
! This routine is implemented only for fftw, essl, acml
|
||||||
! If not implemented, cfft3d is called instead
|
! If not implemented, cfft3d is called instead
|
||||||
!
|
!
|
||||||
|
! NB: this version is by far much slower than the 3D FFT of the
|
||||||
|
! entire data.
|
||||||
|
!
|
||||||
!----------------------------------------------------------------------
|
!----------------------------------------------------------------------
|
||||||
!
|
!
|
||||||
implicit none
|
implicit none
|
||||||
|
|
|
@ -153,21 +153,27 @@ MODULE fft_types
|
||||||
|
|
||||||
INTEGER :: grid_id
|
INTEGER :: grid_id
|
||||||
#if defined(__CUDA)
|
#if defined(__CUDA)
|
||||||
|
! These CUDA streams are used in the 1D+1D+1D GPU implementation
|
||||||
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_scatter_yz
|
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_scatter_yz
|
||||||
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_many
|
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_many
|
||||||
INTEGER :: nstream_many = 16
|
! These CUDA streams (and events) are used in the 1D+2D FPU implementation
|
||||||
|
|
||||||
INTEGER(kind=cuda_stream_kind) :: a2a_comp
|
INTEGER(kind=cuda_stream_kind) :: a2a_comp
|
||||||
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: bstreams
|
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: bstreams
|
||||||
TYPE(cudaEvent), allocatable, dimension(:) :: bevents
|
TYPE(cudaEvent), allocatable, dimension(:) :: bevents
|
||||||
|
!
|
||||||
|
! These variables define the dimension of batches and subbatches in
|
||||||
|
! * the 1D+2D GPU implementation:
|
||||||
INTEGER :: batchsize = 16 ! how many ffts to batch together
|
INTEGER :: batchsize = 16 ! how many ffts to batch together
|
||||||
INTEGER :: subbatchsize = 4 ! size of subbatch for pipelining
|
INTEGER :: subbatchsize = 4 ! size of subbatch for pipelining
|
||||||
|
! * the 1D+1D+1D implementation:
|
||||||
|
INTEGER :: nstream_many = 16 ! this should be replace by batchsize
|
||||||
|
! since it has the same meaning.
|
||||||
|
!
|
||||||
#if defined(__IPC)
|
#if defined(__IPC)
|
||||||
INTEGER :: IPC_PEER(16) ! This is used for IPC that is not imlpemented yet.
|
INTEGER :: IPC_PEER(16) ! This is used for IPC that is not imlpemented yet.
|
||||||
#endif
|
#endif
|
||||||
INTEGER, ALLOCATABLE :: srh(:,:) ! Isend/recv handles by subbatch
|
INTEGER, ALLOCATABLE :: srh(:,:) ! These are non blocking send/recv handles that are used to
|
||||||
|
! overlap computation and communication of FFTs subbatches.
|
||||||
#endif
|
#endif
|
||||||
COMPLEX(DP), ALLOCATABLE, DIMENSION(:) :: aux
|
COMPLEX(DP), ALLOCATABLE, DIMENSION(:) :: aux
|
||||||
#if defined(__FFT_OPENMP_TASKS)
|
#if defined(__FFT_OPENMP_TASKS)
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit e5392b772497f8597f6c3b2851e0c17da756bb64
|
Subproject commit a9f7a1b01ab10e00cae22a5dca4f73ebf7e4917d
|
|
@ -1 +1 @@
|
||||||
Subproject commit 6fef49bcfc4a380432f15734ed0ca1f0b0388977
|
Subproject commit 819745f5849de5c9de516be133ab206691738257
|
Loading…
Reference in New Issue