mirror of https://gitlab.com/QEF/q-e.git
more threaded_backassignement (including optionally summing another vector)
This commit is contained in:
parent
819ab53cc5
commit
64cca07a92
|
@ -531,20 +531,23 @@ SUBROUTINE ppcg_gamma( h_psi, s_psi, overlap, precondition, &
|
||||||
call start_clock('ppcg:dgemm')
|
call start_clock('ppcg:dgemm')
|
||||||
call threaded_assign( buffer1, psi, npwx, l, col_idx )
|
call threaded_assign( buffer1, psi, npwx, l, col_idx )
|
||||||
CALL DGEMM('N','N',npw2, l, l, ONE, buffer1, npwx2, coord_psi, sbsize, ZERO, buffer, npwx2)
|
CALL DGEMM('N','N',npw2, l, l, ONE, buffer1, npwx2, coord_psi, sbsize, ZERO, buffer, npwx2)
|
||||||
psi(:,col_idx(1:l)) = buffer(:,1:l) + p(:,col_idx(1:l))
|
! psi(:,col_idx(1:l)) = buffer(:,1:l) + p(:,col_idx(1:l))
|
||||||
|
call threaded_backassign( psi, act_idx, buffer, npwx, l, p )
|
||||||
call stop_clock('ppcg:dgemm')
|
call stop_clock('ppcg:dgemm')
|
||||||
!
|
!
|
||||||
call start_clock('ppcg:dgemm')
|
call start_clock('ppcg:dgemm')
|
||||||
call threaded_assign( buffer1, hpsi, npwx, l, col_idx )
|
call threaded_assign( buffer1, hpsi, npwx, l, col_idx )
|
||||||
CALL DGEMM('N','N',npw2, l, l, ONE, buffer1, npwx2, coord_psi, sbsize, ZERO, buffer, npwx2)
|
CALL DGEMM('N','N',npw2, l, l, ONE, buffer1, npwx2, coord_psi, sbsize, ZERO, buffer, npwx2)
|
||||||
hpsi(:,col_idx(1:l)) = buffer(:,1:l) + hp(:,col_idx(1:l))
|
! hpsi(:,col_idx(1:l)) = buffer(:,1:l) + hp(:,col_idx(1:l))
|
||||||
|
call threaded_backassign( hpsi, act_idx, buffer, npwx, l, hp )
|
||||||
call stop_clock('ppcg:dgemm')
|
call stop_clock('ppcg:dgemm')
|
||||||
!
|
!
|
||||||
if (overlap) then
|
if (overlap) then
|
||||||
call start_clock('ppcg:dgemm')
|
call start_clock('ppcg:dgemm')
|
||||||
call threaded_assign( buffer1, spsi, npwx, l, col_idx )
|
call threaded_assign( buffer1, spsi, npwx, l, col_idx )
|
||||||
CALL DGEMM('N','N',npw2, l, l, ONE, buffer1, npwx2, coord_psi, sbsize, ZERO, buffer, npwx2)
|
CALL DGEMM('N','N',npw2, l, l, ONE, buffer1, npwx2, coord_psi, sbsize, ZERO, buffer, npwx2)
|
||||||
spsi(:,col_idx(1:l)) = buffer(:,1:l) + sp(:,col_idx(1:l))
|
! spsi(:,col_idx(1:l)) = buffer(:,1:l) + sp(:,col_idx(1:l))
|
||||||
|
call threaded_backassign( spsi, act_idx, buffer, npwx, l, sp )
|
||||||
call stop_clock('ppcg:dgemm')
|
call stop_clock('ppcg:dgemm')
|
||||||
end if
|
end if
|
||||||
!
|
!
|
||||||
|
@ -1689,11 +1692,14 @@ nguard = 0 ! 24 ! 50
|
||||||
!
|
!
|
||||||
END SUBROUTINE threaded_assign
|
END SUBROUTINE threaded_assign
|
||||||
|
|
||||||
SUBROUTINE threaded_backassign(array_out, idx, array_in, kdimx, nact )
|
SUBROUTINE threaded_backassign(array_out, idx, array_in, kdimx, nact, a2_in )
|
||||||
!
|
!
|
||||||
! assign (copy) a complex array in a threaded way
|
! assign (copy) a complex array in a threaded way
|
||||||
!
|
!
|
||||||
! array_out( 1:kdimx, idx(1:nact) ) = array_in( 1:kdimx, 1:nact )
|
! array_out( 1:kdimx, idx(1:nact) ) = array_in( 1:kdimx, 1:nact ) or
|
||||||
|
!
|
||||||
|
! array_out( 1:kdimx, idx(1:nact) ) = array_in( 1:kdimx, 1:nact ) + a2_in( 1:kdimx, idx(1:nact) (
|
||||||
|
! if a2_in is present
|
||||||
!
|
!
|
||||||
! the index array idx is mandatory otherwise one could use previous routine)
|
! the index array idx is mandatory otherwise one could use previous routine)
|
||||||
!
|
!
|
||||||
|
@ -1703,6 +1709,7 @@ nguard = 0 ! 24 ! 50
|
||||||
!
|
!
|
||||||
COMPLEX(DP), INTENT(INOUT) :: array_out( kdimx, * ) ! we don't want to mess with un referenced columns
|
COMPLEX(DP), INTENT(INOUT) :: array_out( kdimx, * ) ! we don't want to mess with un referenced columns
|
||||||
COMPLEX(DP), INTENT(IN) :: array_in ( kdimx, nact )
|
COMPLEX(DP), INTENT(IN) :: array_in ( kdimx, nact )
|
||||||
|
COMPLEX(DP), INTENT(IN), OPTIONAL :: a2_in ( kdimx, * )
|
||||||
INTEGER, INTENT(IN) :: kdimx, nact
|
INTEGER, INTENT(IN) :: kdimx, nact
|
||||||
INTEGER, INTENT(IN) :: idx( * )
|
INTEGER, INTENT(IN) :: idx( * )
|
||||||
!
|
!
|
||||||
|
@ -1716,11 +1723,21 @@ nguard = 0 ! 24 ! 50
|
||||||
|
|
||||||
nblock = (kdimx - 1)/blocksz + 1
|
nblock = (kdimx - 1)/blocksz + 1
|
||||||
|
|
||||||
!$omp parallel do collapse(2)
|
IF ( present(a2_in) ) THEN
|
||||||
DO i=1, nact ; DO j=1,nblock
|
!$omp parallel do collapse(2)
|
||||||
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), idx( i ) ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i )
|
DO i=1, nact ; DO j=1,nblock
|
||||||
ENDDO ; ENDDO
|
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), idx( i ) ) = &
|
||||||
!$omp end parallel do
|
array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i ) + &
|
||||||
|
a2_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), idx( i ) )
|
||||||
|
ENDDO ; ENDDO
|
||||||
|
!$omp end parallel do
|
||||||
|
ELSE
|
||||||
|
!$omp parallel do collapse(2)
|
||||||
|
DO i=1, nact ; DO j=1,nblock
|
||||||
|
array_out(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), idx( i ) ) = array_in(1+(j-1)*blocksz:MIN(j*blocksz,kdimx), i )
|
||||||
|
ENDDO ; ENDDO
|
||||||
|
!$omp end parallel do
|
||||||
|
END IF
|
||||||
!
|
!
|
||||||
END SUBROUTINE threaded_backassign
|
END SUBROUTINE threaded_backassign
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue