mirror of https://gitlab.com/QEF/q-e.git
Some more comments
This commit is contained in:
parent
076280880c
commit
38cb7372f6
|
@ -98,8 +98,6 @@
|
|||
#endif
|
||||
|
||||
IF (isign < 0) THEN
|
||||
!print *,"exec cufft FWD",nz,ldz,nsl
|
||||
!call flush(6)
|
||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_FORWARD )
|
||||
tscale = 1.0_DP / nz
|
||||
IF (is_inplace) THEN
|
||||
|
@ -114,12 +112,10 @@
|
|||
END DO
|
||||
END IF
|
||||
ELSE IF (isign > 0) THEN
|
||||
!print *,"exec cufft INV",nz,ldz,nsl
|
||||
!call flush(6)
|
||||
IF (is_inplace) THEN
|
||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_INVERSE ) !CUFFT_FORWARD )
|
||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_INVERSE )
|
||||
ELSE
|
||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), cout_d(1), CUFFT_INVERSE ) !CUFFT_FORWARD )
|
||||
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), cout_d(1), CUFFT_INVERSE )
|
||||
END IF
|
||||
END IF
|
||||
|
||||
|
@ -163,10 +159,6 @@
|
|||
DATA_DIM, STRIDE, DIST, &
|
||||
CUFFT_Z2Z, BATCH )
|
||||
|
||||
#if defined(__CUDA_DEBUG)
|
||||
print *,"INIT CUFFT Z PLAN: ",nz,"x",nsl,"x",ldz
|
||||
#endif
|
||||
|
||||
#ifdef TRACK_FLOPS
|
||||
zflops( icurrent ) = 5.0d0 * REAL( nz ) * log( REAL( nz ) )/log( 2.d0 )
|
||||
#endif
|
||||
|
@ -428,10 +420,6 @@
|
|||
DATA_DIM, STRIDE, DIST, &
|
||||
CUFFT_Z2Z, BATCH )
|
||||
|
||||
#if defined(__CUDA_DEBUG)
|
||||
print *,"INIT CUFFT ALL_XY PLAN: ",nx,"x",ny,"x",nzl,"ldx:",ldx,"batch:",batch_1,batch_2
|
||||
#endif
|
||||
|
||||
#else
|
||||
INTEGER, PARAMETER :: RANK=1
|
||||
INTEGER :: FFT_DIM_X(RANK), DATA_DIM_X(RANK), FFT_DIM_Y(RANK), DATA_DIM_Y(RANK)
|
||||
|
@ -455,9 +443,6 @@
|
|||
IF( cufft_plan_y( 1, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(1,icurrent) )
|
||||
IF( cufft_plan_y( 2, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(2,icurrent) )
|
||||
|
||||
#if defined(__CUDA_DEBUG)
|
||||
print *,"INIT CUFFT XY PLAN: ",nx,"x",ny,"x",nzl,"ldx:",ldx,"batch:",batch_1,batch_2
|
||||
#endif
|
||||
|
||||
istat = cufftPlanMany( cufft_plan_x( icurrent), RANK, FFT_DIM_X, &
|
||||
DATA_DIM_X, STRIDE_X, DIST_X, &
|
||||
|
@ -567,11 +552,9 @@
|
|||
DO i=1, ldx*ldy*ldz*howmany
|
||||
f_d( i ) = f_d( i ) * tscale
|
||||
END DO
|
||||
! call ZDSCAL( nx * ny * nz, tscale, f_d(1), 1)
|
||||
|
||||
ELSE IF( isign > 0 ) THEN
|
||||
|
||||
! call FFTW_INPLACE_DRV_3D( bw_plan(ip), 1, f_d(1), 1, 1 )
|
||||
istat = cufftExecZ2Z( cufft_plan_3d(ip), f_d(1), f_d(1), CUFFT_INVERSE )
|
||||
|
||||
END IF
|
||||
|
@ -642,6 +625,9 @@
|
|||
! This routine is implemented only for fftw, essl, acml
|
||||
! If not implemented, cfft3d is called instead
|
||||
!
|
||||
! NB: this version is by far much slower than the 3D FFT of the
|
||||
! entire data.
|
||||
!
|
||||
!----------------------------------------------------------------------
|
||||
!
|
||||
implicit none
|
||||
|
|
|
@ -153,21 +153,27 @@ MODULE fft_types
|
|||
|
||||
INTEGER :: grid_id
|
||||
#if defined(__CUDA)
|
||||
! These CUDA streams are used in the 1D+1D+1D GPU implementation
|
||||
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_scatter_yz
|
||||
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_many
|
||||
INTEGER :: nstream_many = 16
|
||||
|
||||
! These CUDA streams (and events) are used in the 1D+2D FPU implementation
|
||||
INTEGER(kind=cuda_stream_kind) :: a2a_comp
|
||||
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: bstreams
|
||||
TYPE(cudaEvent), allocatable, dimension(:) :: bevents
|
||||
|
||||
!
|
||||
! These variables define the dimension of batches and subbatches in
|
||||
! * the 1D+2D GPU implementation:
|
||||
INTEGER :: batchsize = 16 ! how many ffts to batch together
|
||||
INTEGER :: subbatchsize = 4 ! size of subbatch for pipelining
|
||||
|
||||
! * the 1D+1D+1D implementation:
|
||||
INTEGER :: nstream_many = 16 ! this should be replace by batchsize
|
||||
! since it has the same meaning.
|
||||
!
|
||||
#if defined(__IPC)
|
||||
INTEGER :: IPC_PEER(16) ! This is used for IPC that is not imlpemented yet.
|
||||
#endif
|
||||
INTEGER, ALLOCATABLE :: srh(:,:) ! Isend/recv handles by subbatch
|
||||
INTEGER, ALLOCATABLE :: srh(:,:) ! These are non blocking send/recv handles that are used to
|
||||
! overlap computation and communication of FFTs subbatches.
|
||||
#endif
|
||||
COMPLEX(DP), ALLOCATABLE, DIMENSION(:) :: aux
|
||||
#if defined(__FFT_OPENMP_TASKS)
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit e5392b772497f8597f6c3b2851e0c17da756bb64
|
||||
Subproject commit a9f7a1b01ab10e00cae22a5dca4f73ebf7e4917d
|
|
@ -1 +1 @@
|
|||
Subproject commit 6fef49bcfc4a380432f15734ed0ca1f0b0388977
|
||||
Subproject commit 819745f5849de5c9de516be133ab206691738257
|
Loading…
Reference in New Issue