Some more comments

This commit is contained in:
Pietro Bonfa 2020-11-23 22:04:56 +01:00
parent 076280880c
commit 38cb7372f6
4 changed files with 18 additions and 26 deletions

View File

@ -98,8 +98,6 @@
#endif
IF (isign < 0) THEN
!print *,"exec cufft FWD",nz,ldz,nsl
!call flush(6)
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_FORWARD )
tscale = 1.0_DP / nz
IF (is_inplace) THEN
@ -114,12 +112,10 @@
END DO
END IF
ELSE IF (isign > 0) THEN
!print *,"exec cufft INV",nz,ldz,nsl
!call flush(6)
IF (is_inplace) THEN
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_INVERSE ) !CUFFT_FORWARD )
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), c_d(1), CUFFT_INVERSE )
ELSE
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), cout_d(1), CUFFT_INVERSE ) !CUFFT_FORWARD )
istat = cufftExecZ2Z( cufft_planz( ip), c_d(1), cout_d(1), CUFFT_INVERSE )
END IF
END IF
@ -163,10 +159,6 @@
DATA_DIM, STRIDE, DIST, &
CUFFT_Z2Z, BATCH )
#if defined(__CUDA_DEBUG)
print *,"INIT CUFFT Z PLAN: ",nz,"x",nsl,"x",ldz
#endif
#ifdef TRACK_FLOPS
zflops( icurrent ) = 5.0d0 * REAL( nz ) * log( REAL( nz ) )/log( 2.d0 )
#endif
@ -428,10 +420,6 @@
DATA_DIM, STRIDE, DIST, &
CUFFT_Z2Z, BATCH )
#if defined(__CUDA_DEBUG)
print *,"INIT CUFFT ALL_XY PLAN: ",nx,"x",ny,"x",nzl,"ldx:",ldx,"batch:",batch_1,batch_2
#endif
#else
INTEGER, PARAMETER :: RANK=1
INTEGER :: FFT_DIM_X(RANK), DATA_DIM_X(RANK), FFT_DIM_Y(RANK), DATA_DIM_Y(RANK)
@ -455,9 +443,6 @@
IF( cufft_plan_y( 1, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(1,icurrent) )
IF( cufft_plan_y( 2, icurrent) /= 0 ) istat = cufftDestroy( cufft_plan_y(2,icurrent) )
#if defined(__CUDA_DEBUG)
print *,"INIT CUFFT XY PLAN: ",nx,"x",ny,"x",nzl,"ldx:",ldx,"batch:",batch_1,batch_2
#endif
istat = cufftPlanMany( cufft_plan_x( icurrent), RANK, FFT_DIM_X, &
DATA_DIM_X, STRIDE_X, DIST_X, &
@ -567,11 +552,9 @@
DO i=1, ldx*ldy*ldz*howmany
f_d( i ) = f_d( i ) * tscale
END DO
! call ZDSCAL( nx * ny * nz, tscale, f_d(1), 1)
ELSE IF( isign > 0 ) THEN
! call FFTW_INPLACE_DRV_3D( bw_plan(ip), 1, f_d(1), 1, 1 )
istat = cufftExecZ2Z( cufft_plan_3d(ip), f_d(1), f_d(1), CUFFT_INVERSE )
END IF
@ -642,6 +625,9 @@
! This routine is implemented only for fftw, essl, acml
! If not implemented, cfft3d is called instead
!
! NB: this version is by far much slower than the 3D FFT of the
! entire data.
!
!----------------------------------------------------------------------
!
implicit none

View File

@ -153,21 +153,27 @@ MODULE fft_types
INTEGER :: grid_id
#if defined(__CUDA)
! These CUDA streams are used in the 1D+1D+1D GPU implementation
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_scatter_yz
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: stream_many
INTEGER :: nstream_many = 16
! These CUDA streams (and events) are used in the 1D+2D FPU implementation
INTEGER(kind=cuda_stream_kind) :: a2a_comp
INTEGER(kind=cuda_stream_kind), allocatable, dimension(:) :: bstreams
TYPE(cudaEvent), allocatable, dimension(:) :: bevents
!
! These variables define the dimension of batches and subbatches in
! * the 1D+2D GPU implementation:
INTEGER :: batchsize = 16 ! how many ffts to batch together
INTEGER :: subbatchsize = 4 ! size of subbatch for pipelining
! * the 1D+1D+1D implementation:
INTEGER :: nstream_many = 16 ! this should be replace by batchsize
! since it has the same meaning.
!
#if defined(__IPC)
INTEGER :: IPC_PEER(16) ! This is used for IPC that is not imlpemented yet.
#endif
INTEGER, ALLOCATABLE :: srh(:,:) ! Isend/recv handles by subbatch
INTEGER, ALLOCATABLE :: srh(:,:) ! These are non blocking send/recv handles that are used to
! overlap computation and communication of FFTs subbatches.
#endif
COMPLEX(DP), ALLOCATABLE, DIMENSION(:) :: aux
#if defined(__FFT_OPENMP_TASKS)

2
external/devxlib vendored

@ -1 +1 @@
Subproject commit e5392b772497f8597f6c3b2851e0c17da756bb64
Subproject commit a9f7a1b01ab10e00cae22a5dca4f73ebf7e4917d

2
external/fox vendored

@ -1 +1 @@
Subproject commit 6fef49bcfc4a380432f15734ed0ca1f0b0388977
Subproject commit 819745f5849de5c9de516be133ab206691738257