mirror of https://gitlab.com/QEF/q-e.git
loop3, loop2, loop4 ported
This commit is contained in:
parent
5af32fb3e5
commit
95ee7dafbc
|
@ -154,23 +154,17 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
! compute the gradient. can reuse information from previous step
|
! compute the gradient. can reuse information from previous step
|
||||||
!
|
!
|
||||||
if (iter == 1) then
|
if (iter == 1) then
|
||||||
!$acc data present(e,g)
|
|
||||||
call ch_psi (ndim, dpsi(1,n_start), g, e(n_start), ik, my_nbnd)
|
call ch_psi (ndim, dpsi(1,n_start), g, e(n_start), ik, my_nbnd)
|
||||||
!$acc end data
|
|
||||||
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
||||||
!$acc data present(g,d0psi)
|
|
||||||
!$acc host_data use_device(d0psi,g)
|
!$acc host_data use_device(d0psi,g)
|
||||||
call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd_), 1)
|
call zaxpy (ndim, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
enddo
|
enddo
|
||||||
IF (npol==2) THEN
|
IF (npol==2) THEN
|
||||||
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
||||||
!$acc data present(g,d0psi)
|
|
||||||
!$acc host_data use_device(d0psi,g)
|
!$acc host_data use_device(d0psi,g)
|
||||||
call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, g(ndmx+1,ibnd_), 1)
|
call zaxpy (ndim, (-1.d0,0.d0), d0psi(ndmx+1,ibnd), 1, g(ndmx+1,ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
enddo
|
enddo
|
||||||
END IF
|
END IF
|
||||||
endif
|
endif
|
||||||
|
@ -182,19 +176,15 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
||||||
if (conv (ibnd) .eq.0) then
|
if (conv (ibnd) .eq.0) then
|
||||||
lbnd = lbnd+1
|
lbnd = lbnd+1
|
||||||
!$acc data present(g,h)
|
|
||||||
!$acc host_data use_device(g,h)
|
!$acc host_data use_device(g,h)
|
||||||
call zcopy (ndmx*npol, g (1, ibnd_), 1, h (1, ibnd_), 1)
|
call zcopy (ndmx*npol, g (1, ibnd_), 1, h (1, ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
call cg_psi(ndmx, ndim, 1, h(1,ibnd_), h_diag(1,ibnd) )
|
call cg_psi(ndmx, ndim, 1, h(1,ibnd_), h_diag(1,ibnd) )
|
||||||
|
|
||||||
IF (gamma_only) THEN
|
IF (gamma_only) THEN
|
||||||
!$acc data present(g,h)
|
|
||||||
!$acc host_data use_device(g,h)
|
!$acc host_data use_device(g,h)
|
||||||
ddotval=2.0d0*myddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1)
|
ddotval=2.0d0*myddot(2*ndmx*npol,h(1,ibnd_),1,g(1,ibnd_),1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
rho(lbnd) = ddotval
|
rho(lbnd) = ddotval
|
||||||
IF(gstart==2) THEN
|
IF(gstart==2) THEN
|
||||||
!$acc kernels present(h,g) copy(rho(1:my_nbnd))
|
!$acc kernels present(h,g) copy(rho(1:my_nbnd))
|
||||||
|
@ -202,11 +192,9 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
!$acc end kernels
|
!$acc end kernels
|
||||||
ENDIF
|
ENDIF
|
||||||
ELSE
|
ELSE
|
||||||
!$acc data present(g,h)
|
|
||||||
!$acc host_data use_device(g,h)
|
!$acc host_data use_device(g,h)
|
||||||
ddotval = myddot (2*ndmx*npol, h(1,ibnd_), 1, g(1,ibnd_), 1)
|
ddotval = myddot (2*ndmx*npol, h(1,ibnd_), 1, g(1,ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
rho(lbnd) = ddotval
|
rho(lbnd) = ddotval
|
||||||
ENDIF
|
ENDIF
|
||||||
endif
|
endif
|
||||||
|
@ -245,11 +233,9 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
!call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd_), 1)
|
!call dscal (2 * ndmx * npol, - 1.d0, h (1, ibnd_), 1)
|
||||||
if (iter.ne.1) then
|
if (iter.ne.1) then
|
||||||
dcgamma = rho (ibnd_) / rhoold (ibnd_)
|
dcgamma = rho (ibnd_) / rhoold (ibnd_)
|
||||||
!$acc data present(h,hold)
|
|
||||||
!$acc host_data use_device(hold,h)
|
!$acc host_data use_device(hold,h)
|
||||||
call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd_), 1, h (1, ibnd_), 1)
|
call zaxpy (ndmx*npol, dcgamma, hold (1, ibnd_), 1, h (1, ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
!
|
!
|
||||||
|
@ -257,11 +243,9 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
! it is later set to the current (becoming old) value of h
|
! it is later set to the current (becoming old) value of h
|
||||||
!
|
!
|
||||||
lbnd = lbnd+1
|
lbnd = lbnd+1
|
||||||
!$acc data present(h,hold)
|
|
||||||
!$acc host_data use_device(hold,h)
|
!$acc host_data use_device(hold,h)
|
||||||
call zcopy (ndmx*npol, h (1, ibnd_), 1, hold (1, lbnd), 1)
|
call zcopy (ndmx*npol, h (1, ibnd_), 1, hold (1, lbnd), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
!$acc kernels present(eu,e)
|
!$acc kernels present(eu,e)
|
||||||
eu (lbnd) = e (ibnd)
|
eu (lbnd) = e (ibnd)
|
||||||
!$acc end kernels
|
!$acc end kernels
|
||||||
|
@ -271,9 +255,7 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
!
|
!
|
||||||
! compute t = A*h
|
! compute t = A*h
|
||||||
!
|
!
|
||||||
!$acc data present(eu, hold, t)
|
|
||||||
call ch_psi (ndim, hold, t, eu, ik, lbnd)
|
call ch_psi (ndim, hold, t, eu, ik, lbnd)
|
||||||
!$acc end data
|
|
||||||
!
|
!
|
||||||
! compute the coefficients a and c for the line minimization
|
! compute the coefficients a and c for the line minimization
|
||||||
! compute step length lambda
|
! compute step length lambda
|
||||||
|
@ -312,36 +294,31 @@ subroutine cgsolve_all (ch_psi, cg_psi, e, d0psi, dpsi, h_diag, &
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
lbnd=0
|
lbnd=0
|
||||||
CALL start_clock('loop4')
|
CALL start_clock('loop4')
|
||||||
!$acc update host(a,c)
|
|
||||||
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
do ibnd = n_start, n_end ; ibnd_ = ibnd - n_start + 1
|
||||||
if (conv (ibnd) .eq.0) then
|
if (conv (ibnd) .eq.0) then
|
||||||
lbnd=lbnd+1
|
lbnd=lbnd+1
|
||||||
|
!$acc serial present(a,c) copyout(dclambda)
|
||||||
dclambda = CMPLX( - a(lbnd) / c(lbnd), 0.d0,kind=DP)
|
dclambda = CMPLX( - a(lbnd) / c(lbnd), 0.d0,kind=DP)
|
||||||
|
!$acc end serial
|
||||||
!
|
!
|
||||||
! move to new position
|
! move to new position
|
||||||
!
|
!
|
||||||
!$acc data present(dpsi,h)
|
|
||||||
!$acc host_data use_device(dpsi,h)
|
!$acc host_data use_device(dpsi,h)
|
||||||
call zaxpy (ndmx*npol, dclambda, h(1,ibnd_), 1, dpsi(1,ibnd), 1)
|
call zaxpy (ndmx*npol, dclambda, h(1,ibnd_), 1, dpsi(1,ibnd), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
!
|
!
|
||||||
! update to get the gradient
|
! update to get the gradient
|
||||||
!
|
!
|
||||||
!g=g+lam
|
!g=g+lam
|
||||||
!$acc data present(t,g)
|
|
||||||
!$acc host_data use_device(t,g)
|
!$acc host_data use_device(t,g)
|
||||||
call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd_), 1)
|
call zaxpy (ndmx*npol, dclambda, t(1,lbnd), 1, g(1,ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
!
|
!
|
||||||
! save current (now old) h and rho for later use
|
! save current (now old) h and rho for later use
|
||||||
!
|
!
|
||||||
!$acc data present(hold,h)
|
|
||||||
!$acc host_data use_device(h,hold)
|
!$acc host_data use_device(h,hold)
|
||||||
call zcopy (ndmx*npol, h(1,ibnd_), 1, hold(1,ibnd_), 1)
|
call zcopy (ndmx*npol, h(1,ibnd_), 1, hold(1,ibnd_), 1)
|
||||||
!$acc end host_data
|
!$acc end host_data
|
||||||
!$acc end data
|
|
||||||
rhoold (ibnd_) = rho (ibnd_)
|
rhoold (ibnd_) = rho (ibnd_)
|
||||||
endif
|
endif
|
||||||
enddo
|
enddo
|
||||||
|
|
|
@ -158,9 +158,10 @@ USE cublas
|
||||||
implicit none
|
implicit none
|
||||||
integer :: n, incx, incy
|
integer :: n, incx, incy
|
||||||
DOUBLE PRECISION, dimension(*) :: dx, dy
|
DOUBLE PRECISION, dimension(*) :: dx, dy
|
||||||
|
DOUBLE PRECISION :: result
|
||||||
#if defined(__CUDA)
|
#if defined(__CUDA)
|
||||||
attributes(device) :: dx, dy
|
attributes(device) :: dx, dy
|
||||||
DOUBLE PRECISION, device :: result
|
attributes(device) :: result
|
||||||
type(cublashandle) :: h
|
type(cublashandle) :: h
|
||||||
integer :: ierr
|
integer :: ierr
|
||||||
h = cublasGetHandle()
|
h = cublasGetHandle()
|
||||||
|
@ -171,3 +172,27 @@ USE cublas
|
||||||
|
|
||||||
return
|
return
|
||||||
end subroutine MYDDOTv3
|
end subroutine MYDDOTv3
|
||||||
|
|
||||||
|
subroutine MYDDOTv4 (n, dx, incx, dy, incy, result)
|
||||||
|
#if defined(__CUDA)
|
||||||
|
USE cudafor
|
||||||
|
USE cublas
|
||||||
|
#endif
|
||||||
|
implicit none
|
||||||
|
integer :: n, incx, incy
|
||||||
|
DOUBLE PRECISION dx(1,*), dy(1,*)
|
||||||
|
DOUBLE PRECISION :: result
|
||||||
|
!DOUBLE PRECISION dx(*,1), dy(*,1)
|
||||||
|
#if defined(__CUDA)
|
||||||
|
attributes(device) :: dx, dy
|
||||||
|
attributes(device) :: result
|
||||||
|
!DOUBLE PRECISION, dimension(1,1), device :: dz
|
||||||
|
!type(cublashandle) :: h
|
||||||
|
!integer :: ierr
|
||||||
|
!h = cublasGetHandle()
|
||||||
|
!ierr=cublasdgemm('T', 'N', 1, 1, n, 1.0d0, dx, n, dy, n, 0.0d0, dz, 1)
|
||||||
|
#else
|
||||||
|
result=DDOT(n, dx, incx, dy, incy)
|
||||||
|
#endif
|
||||||
|
end subroutine MYDDOTv4
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue