more chuncked omp parallel do loops

This commit is contained in:
Stefano de Gironcoli 2018-08-06 03:34:51 +02:00
parent 86920021c8
commit 56197ff9ca
2 changed files with 68 additions and 73 deletions

View File

@ -87,9 +87,13 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, &
LOGICAL :: force_repmat ! = .TRUE. to force replication of the Gram matrices for Cholesky
! Needed if the sizes of these matrices become too small after locking
REAL :: res_array(maxter)
REAL :: res_array(maxter)
res_array = 0.0
INTEGER, PARAMETER :: blocksz = 256 ! used to optimize some omp parallel do loops
INTEGER :: nblock
nblock = (npw -1) /blocksz + 1 ! used to optimize some omp parallel do loops
res_array = 0.0
!
CALL start_clock( 'ppcg_gamma' )
!
@ -184,10 +188,12 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, &
!
! ... apply the diagonal preconditioner
!
!$omp parallel do
DO j = 1, nact
w(1:npw, act_idx(j)) = w(1:npw,act_idx(j)) / precondition(1:npw)
END DO
!$omp parallel do collapse(2)
DO j = 1, nact ; DO i=1,nblock
w(1+(i-1)*blocksz:MIN(i*blocksz,npw), act_idx(j)) = &
w(1+(i-1)*blocksz:MIN(i*blocksz,npw), act_idx(j)) / &
precondition(1+(i-1)*blocksz:MIN(i*blocksz,npw))
END DO ; END DO
!$omp end parallel do
!
call start_clock('ppcg:dgemm')
@ -567,19 +573,17 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, &
! residuals for individual eigenpairs in psi and e
if (overlap) then
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i = 1, npw
w( i, j ) = hpsi( i, j ) - spsi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) &
- spsi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j )*e( j )
END DO ; END DO
!$omp end parallel do
else
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i = 1, npw
w( i, j ) = hpsi( i, j ) - psi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) &
- psi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j )*e( j )
END DO ; END DO
!$omp end parallel do
end if
!
@ -732,19 +736,17 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, &
! ... Compute residuals
if (overlap) then
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i = 1, npw
w( i, j ) = hpsi( i, j ) - spsi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) &
- spsi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j )*e( j )
END DO ; END DO
!$omp end parallel do
else
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i = 1, npw
w( i, j ) = hpsi( i, j ) - psi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j ) &
- psi( 1+(i-1)*blocksz:MIN(i*blocksz,npw), j )*e( j )
END DO ; END DO
!$omp end parallel do
end if
!
@ -1654,19 +1656,15 @@ nguard = 0 ! 24 ! 50
IF (present(act_idx) ) THEN
!$omp parallel do collapse(2)
DO i=1, nact
DO j=1, nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), act_idx( i ) )
ENDDO
ENDDO
DO i=1, nact ; DO j=1,nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), act_idx( i ) )
ENDDO ; ENDDO
!$omp end parallel do
ELSE
!$omp parallel do collapse(2)
DO i=1, nact
DO j=1, nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i )
ENDDO
ENDDO
DO i=1, nact ; DO j=1,nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i )
ENDDO ; ENDDO
!$omp end parallel do
END IF
!

View File

@ -90,7 +90,10 @@ SUBROUTINE ppcg_k( h_psi, s_psi, overlap, precondition, &
! Needed if the sizes of these matrices become too small after locking
REAL :: res_array(maxter)
res_array = 0.0
INTEGER, PARAMETER :: blocksz = 256 ! used to optimize some omp parallel do loops
INTEGER :: nblock
res_array = 0.0
!
CALL start_clock( 'ppcg_k' )
!
@ -187,12 +190,13 @@ SUBROUTINE ppcg_k( h_psi, s_psi, overlap, precondition, &
!
! ... apply the diagonal preconditioner
!
!$omp parallel do collapse(2)
DO j = 1, nact
do ipol=0,npol-1
w(1+npwx*ipol:npw+npwx*ipol, act_idx(j)) = w(1+npwx*ipol:npw+npwx*ipol,act_idx(j)) / precondition(1:npw)
end do
END DO
nblock = (npw-1) / blocksz +1 ! used to optimize some omp parallel do loops
!$omp parallel do collapse(3)
DO j = 1, nact ; DO ipol=0,npol-1 ; DO i=1,nblock
w(1+(i-1)*blocksz+npwx*ipol:MIN(i*blocksz,npw)+npwx*ipol, act_idx(j)) = &
w(1+(i-1)*blocksz+npwx*ipol:MIN(i*blocksz,npw)+npwx*ipol, act_idx(j)) / &
precondition(1+(i-1)*blocksz:MIN(i*blocksz,npw))
END DO ; END DO ; END DO
!$omp end parallel do
!
call start_clock('ppcg:zgemm')
@ -555,21 +559,20 @@ SUBROUTINE ppcg_k( h_psi, s_psi, overlap, precondition, &
!
! ... Compute the new residual vector block by evaluating
! residuals for individual eigenpairs in psi and e
nblock = (kdim-1) / blocksz + 1 ! used to optimize some omp parallel do loops
if (overlap) then
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i = 1, kdim
w( i ,j ) = hpsi( i, j ) - spsi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,kdim) ,j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j ) &
- spsi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j )*e( j )
END DO ; END DO
!$omp end parallel do
else
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i = 1, kdim
w( i, j ) = hpsi( i, j ) - psi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,kdim) ,j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j ) &
- psi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j )*e( j )
END DO ; END DO
!$omp end parallel do
end if
!
@ -721,19 +724,17 @@ SUBROUTINE ppcg_k( h_psi, s_psi, overlap, precondition, &
! ... Compute residuals
if (overlap) then
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i =1, kdim
w( i, j ) = hpsi( i, j ) - spsi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j ) &
- spsi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j )*e( j )
END DO ; END DO
!$omp end parallel do
else
!$omp parallel do collapse(2)
DO j = 1, nbnd
DO i =1, kdim
w( i, j ) = hpsi( i, j ) - psi( i, j )*e( j )
END DO
END DO
DO j = 1, nbnd ; DO i=1,nblock
w( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j ) = hpsi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j ) &
- psi( 1+(i-1)*blocksz:MIN(i*blocksz,kdim), j )*e( j )
END DO ; END DO
!$omp end parallel do
end if
!
@ -1630,19 +1631,15 @@ nguard = 0 ! 24 ! 50
IF (present(act_idx) ) THEN
!$omp parallel do collapse(2)
DO i=1, nact
DO j=1, nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), act_idx( i ) )
ENDDO
ENDDO
DO i=1, nact ; DO j=1,nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), act_idx( i ) )
ENDDO ; ENDDO
!$omp end parallel do
ELSE
!$omp parallel do collapse(2)
DO i=1, nact
DO j=1, nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i )
ENDDO
ENDDO
DO i=1, nact ; DO j=1,nblock
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i )
ENDDO ; ENDDO
!$omp end parallel do
END IF
!