Merge branch 'opt-init-rebase' into 'master'

Optimize initialization

See merge request QEF/q-e!6
This commit is contained in:
giannozz 2018-01-28 11:04:28 +00:00
commit e31aa94fe1
3 changed files with 202 additions and 127 deletions

View File

@ -56,22 +56,22 @@ CONTAINS
!
! here a few local variables
!
REAL(DP) :: t (3), tt
INTEGER :: ngm_save, n1, n2, n3, ngm_offset, ngm_max
REAL(DP) :: tx(3), ty(3), t(3)
REAL(DP), ALLOCATABLE :: tt(:)
INTEGER :: ngm_save, n1, n2, n3, ngm_offset, ngm_max, ngm_local
!
REAL(DP), ALLOCATABLE :: g2sort_g(:)
! array containing all g vectors, on all processors: replicated data
! when no_global_sort is present (and it is true) only g vectors for
! the current processor are stored
INTEGER, ALLOCATABLE :: mill_g(:,:), mill_unsorted(:,:)
! array containing only g vectors for the current processor
INTEGER, ALLOCATABLE :: mill_unsorted(:,:)
! array containing all g vectors generators, on all processors
! (replicated data). When no_global_sort is present and .true.,
! only g-vectors for the current processor are stored
INTEGER, ALLOCATABLE :: igsrt(:)
INTEGER, ALLOCATABLE :: igsrt(:), g2l(:)
!
INTEGER :: ni, nj, nk, i, j, k, ipol, ng, igl, indsw
INTEGER :: istart, jstart, kstart
INTEGER :: mype, npe
LOGICAL :: global_sort
LOGICAL :: global_sort, is_local
INTEGER, ALLOCATABLE :: ngmpe(:)
!
global_sort = .TRUE.
@ -90,6 +90,7 @@ CONTAINS
ngm_save = ngm
!
ngm = 0
ngm_local = 0
!
! set the total number of fft mesh points and and initial value of gg
! The choice of gcutm is due to the fact that we have to order the
@ -99,54 +100,98 @@ CONTAINS
!
! and computes all the g vectors inside a sphere
!
ALLOCATE( mill_g( 3, ngm_max ),mill_unsorted( 3, ngm_max ) )
ALLOCATE( mill_unsorted( 3, ngm_save ) )
ALLOCATE( igsrt( ngm_max ) )
ALLOCATE( g2l( ngm_max ) )
ALLOCATE( g2sort_g( ngm_max ) )
!
g2sort_g(:) = 1.0d20
!
! allocate temporal array
!
ALLOCATE( tt( dfftp%nr3 ) )
!
! max miller indices (same convention as in module stick_set)
!
ni = (dfftp%nr1-1)/2
nj = (dfftp%nr2-1)/2
nk = (dfftp%nr3-1)/2
!
iloop: DO i = -ni, ni
! gamma-only: exclude space with x < 0
!
IF ( gamma_only ) THEN
istart = 0
ELSE
istart = -ni
ENDIF
!
iloop: DO i = istart, ni
!
! gamma-only: exclude space with x < 0
! gamma-only: exclude plane with x = 0, y < 0
!
IF ( gamma_only .and. i < 0) CYCLE iloop
jloop: DO j = -nj, nj
IF ( gamma_only .and. i == 0 ) THEN
jstart = 0
ELSE
jstart = -nj
ENDIF
!
tx(1:3) = i * bg(1:3,1)
!
jloop: DO j = jstart, nj
!
! gamma-only: exclude plane with x = 0, y < 0
!
IF ( gamma_only .and. i == 0 .and. j < 0) CYCLE jloop
IF( .NOT. global_sort ) THEN
IF ( fft_stick_index( dfftp, i, j ) == 0) CYCLE jloop
IF ( .NOT. global_sort ) THEN
IF ( fft_stick_index( dfftp, i, j ) == 0 ) CYCLE jloop
is_local = .TRUE.
ELSE
IF ( dfftp%lpara .AND. fft_stick_index( dfftp, i, j ) == 0) THEN
is_local = .FALSE.
ELSE
is_local = .TRUE.
END IF
END IF
kloop: DO k = -nk, nk
!
! gamma-only: exclude line with x = 0, y = 0, z < 0
!
IF ( gamma_only .and. i == 0 .and. j == 0 ) THEN
kstart = 0
ELSE
kstart = -nk
ENDIF
!
ty(1:3) = tx(1:3) + j * bg(1:3,2)
!
! compute all the norm square
!
DO k = kstart, nk
!
! gamma-only: exclude line with x = 0, y = 0, z < 0
!
IF ( gamma_only .and. i == 0 .and. j == 0 .and. k < 0) CYCLE kloop
t(:) = i * bg (:,1) + j * bg (:,2) + k * bg (:,3)
tt = t(1)**2+t(2)**2+t(3)**2
IF (tt <= gcutm) THEN
t(1) = ty(1) + k * bg(1,3)
t(2) = ty(2) + k * bg(2,3)
t(3) = ty(3) + k * bg(3,3)
tt(k-kstart+1) = t(1)**2 + t(2)**2 + t(3)**2
ENDDO
!
! save all the norm square within cutoff
!
DO k = kstart, nk
IF (tt(k-kstart+1) <= gcutm) THEN
ngm = ngm + 1
IF (ngm > ngm_max) CALL errore ('ggen 1', 'too many g-vectors', ngm)
mill_unsorted( :, ngm ) = (/ i,j,k /)
IF ( tt > eps8 ) THEN
g2sort_g(ngm) = tt
IF ( tt(k-kstart+1) > eps8 ) THEN
g2sort_g(ngm) = tt(k-kstart+1)
ELSE
g2sort_g(ngm) = 0.d0
ENDIF
IF (is_local) THEN
ngm_local = ngm_local + 1
mill_unsorted( :, ngm_local ) = (/ i,j,k /)
g2l(ngm) = ngm_local
ELSE
g2l(ngm) = 0
ENDIF
ENDIF
ENDDO kloop
ENDDO
ENDDO jloop
ENDDO iloop
IF (ngm /= ngm_max) &
CALL errore ('ggen', 'g-vectors missing !', abs(ngm - ngm_max))
!
@ -156,10 +201,7 @@ CONTAINS
ELSE
CALL hpsort_eps( ngm_g, g2sort_g, igsrt, eps8 )
END IF
mill_g(1,:) = mill_unsorted(1,igsrt(:))
mill_g(2,:) = mill_unsorted(2,igsrt(:))
mill_g(3,:) = mill_unsorted(3,igsrt(:))
DEALLOCATE( g2sort_g, igsrt, mill_unsorted )
DEALLOCATE( g2sort_g, tt )
IF( .NOT. global_sort ) THEN
!
@ -183,33 +225,31 @@ CONTAINS
ngm = 0
!
ngloop: DO ng = 1, ngm_max
i = mill_g(1, ng)
j = mill_g(2, ng)
k = mill_g(3, ng)
IF( dfftp%lpara .AND. global_sort ) THEN
IF ( fft_stick_index( dfftp, i, j ) == 0) CYCLE ngloop
END IF
ngm = ngm + 1
! Here map local and global g index !!! N.B: :
! the global G vectors arrangement depends on the number of processors
!
IF( .NOT. global_sort ) THEN
ig_l2g( ngm ) = ng + ngm_offset
ELSE
ig_l2g( ngm ) = ng
END IF
g (1:3, ngm) = i * bg (:, 1) + j * bg (:, 2) + k * bg (:, 3)
gg (ngm) = sum(g (1:3, ngm)**2)
IF (g2l(igsrt(ng))>0) THEN
! fetch the indices
i = mill_unsorted(1, g2l(igsrt(ng)))
j = mill_unsorted(2, g2l(igsrt(ng)))
k = mill_unsorted(3, g2l(igsrt(ng)))
!
ngm = ngm + 1
!
! Here map local and global g index !!! N.B: :
! the global G vectors arrangement depends on the number of processors
!
IF( .NOT. global_sort ) THEN
ig_l2g( ngm ) = ng + ngm_offset
ELSE
ig_l2g( ngm ) = ng
END IF
g(1:3, ngm) = i * bg (:, 1) + j * bg (:, 2) + k * bg (:, 3)
gg(ngm) = sum(g(1:3, ngm)**2)
ENDIF
ENDDO ngloop
DEALLOCATE( mill_g )
DEALLOCATE( igsrt, g2l )
IF (ngm /= ngm_save) &
CALL errore ('ggen', 'g-vectors (ngm) missing !', abs(ngm - ngm_save))
!

View File

@ -7,6 +7,64 @@
!
!
!--------------------------------------------------------------------------
SUBROUTINE compute_distances_SoA(pos, N, RSoA, Distances)
!--------------------------------------------------------------------------
!
! This routine computes the distance between pos and all the centers in RSoA
! All the positions are in crystal coordinates
!
USE kinds, ONLY : dp
USE cell_base, ONLY : at
!
IMPLICIT NONE
!
REAL(DP),INTENT(in) :: pos(3) ! reference position
INTEGER,INTENT(in) :: N ! number of elements in RSoA
REAL(DP),INTENT(in) :: RSoA(N,3) ! positions in SoA layout
REAL(DP),INTENT(out) :: Distances(N) ! minimal distances
!
REAL(DP) :: corners(3,8)
REAL(DP) :: dx, dy, dz, dx_c, dy_c, dz_c
REAL(DP) :: dist, dist_min
INTEGER :: ix, iy, iz, ic, iat
ic = 0
DO ix = 0,1
dx = DBLE(-ix)
DO iy = 0,1
dy = DBLE(-iy)
DO iz = 0,1
dz = DBLE(-iz)
ic = ic + 1
corners(1,ic) = dx*at(1,1) + dy*at(1,2) + dz*at(1,3)
corners(2,ic) = dx*at(2,1) + dy*at(2,2) + dz*at(2,3)
corners(3,ic) = dx*at(3,1) + dy*at(3,2) + dz*at(3,3)
ENDDO
ENDDO
ENDDO
DO iat = 1,N
dx = RSoA(iat,1) - pos(1)
dx = dx - FLOOR(dx)
dy = RSoA(iat,2) - pos(2)
dy = dy - FLOOR(dy)
dz = RSoA(iat,3) - pos(3)
dz = dz - FLOOR(dz)
dx_c = dx*at(1,1) + dy*at(1,2) + dz*at(1,3)
dy_c = dx*at(2,1) + dy*at(2,2) + dz*at(2,3)
dz_c = dx*at(3,1) + dy*at(3,2) + dz*at(3,3)
dist_min = dx_c*dx_c + dy_c*dy_c + dz_c*dz_c;
DO ic = 2,8
dx = dx_c + corners(1,ic)
dy = dy_c + corners(2,ic)
dz = dz_c + corners(3,ic)
dist = dx*dx + dy*dy + dz*dz
IF (dist<dist_min) dist_min = dist
ENDDO
Distances(iat) = SQRT(dist_min)
ENDDO
END SUBROUTINE compute_distances_SoA
SUBROUTINE make_pointlists
!--------------------------------------------------------------------------
!
@ -35,11 +93,11 @@ SUBROUTINE make_pointlists
INTEGER idx,indproc,iat,ir,iat1
INTEGER i,j,k,i0,j0,k0,jj0,kk0,ipol,nt,nt1
REAL(DP) :: posi(3), distance
REAL(DP), ALLOCATABLE :: tau0(:,:), distmin(:)
REAL(DP) :: posi(3), WS_radius, dist
REAL(DP), ALLOCATABLE :: tau0(:,:), tau_SoA(:,:), distmin(:), distances(:)
WRITE( stdout,'(5x,"Generating pointlists ...")')
ALLOCATE(tau0(3,nat))
ALLOCATE(tau0(3,nat), tau_SoA(nat,3), distances(nat))
ALLOCATE( distmin(ntyp) )
! Bring all the atomic positions on the first unit cell
@ -48,43 +106,35 @@ SUBROUTINE make_pointlists
CALL cryst_to_cart(nat,tau0,bg,-1)
DO iat=1,nat
DO ipol=1,3
tau0(ipol,iat)=tau0(ipol,iat)-NINT(tau0(ipol,iat))
tau_SoA(iat,ipol)=tau0(ipol,iat)
ENDDO
ENDDO
CALL cryst_to_cart(nat,tau0,at,1)
! Check the minimum distance between two atoms in the system
WS_radius = 1.d100
DO i = -1,1
DO j = -1,1
DO k = -1,1
IF ( i==0 .AND. j==0 .AND. k==0 ) CYCLE
dist = ( DBLE(i)*at(1,1) + DBLE(j)*at(1,2) + DBLE(k)*at(1,3) )**2 &
+ ( DBLE(i)*at(2,1) + DBLE(j)*at(2,2) + DBLE(k)*at(2,3) )**2 &
+ ( DBLE(i)*at(3,1) + DBLE(j)*at(3,2) + DBLE(k)*at(3,3) )**2
IF (dist<WS_radius) WS_radius = dist
ENDDO
ENDDO
ENDDO
WS_radius = SQRT(WS_radius)
distmin(:) = 1.d0
distmin(:) = WS_radius
DO iat = 1,nat
nt = ityp(iat)
call compute_distances_SoA(tau0(1:3,iat), nat, tau_SoA, distances)
DO iat1 = 1,nat
IF (iat.eq.iat1) CYCLE
nt1 = ityp(iat1)
! posi is the position of a second atom
DO i = -1,1
DO j = -1,1
DO k = -1,1
distance = 0.d0
DO ipol = 1,3
posi(ipol) = tau0(ipol,iat1) + DBLE(i)*at(ipol,1) &
+ DBLE(j)*at(ipol,2) &
+ DBLE(k)*at(ipol,3)
distance = distance + (posi(ipol)-tau0(ipol,iat))**2
ENDDO
distance = SQRT(distance)
IF ((distance.LT.distmin(nt)).AND.(distance.GT.1.d-8)) &
& distmin(nt) = distance
IF ((distance.LT.distmin(nt1)).AND.(distance.GT.1.d-8)) &
& distmin(nt1) = distance
ENDDO ! k
ENDDO ! j
ENDDO ! i
IF (distances(iat1).LT.distmin(nt)) distmin(nt) = distances(iat1)
IF (distances(iat1).LT.distmin(nt1)) distmin(nt1) = distances(iat1)
ENDDO ! iat1
ENDDO ! iat
@ -111,6 +161,9 @@ SUBROUTINE make_pointlists
factlist(:) = 0.d0
jj0 = dfftp%my_i0r2p ; kk0 = dfftp%my_i0r3p
DO ir = 1, dfftp%nr1x*dfftp%my_nr2p*dfftp%my_nr3p
! ... check result vector boundary
IF( ir .GT. SIZE( factlist ) .OR. ir .GT. SIZE( pointlist )) &
CALL errore( ' make_pointlists ', ' inconsistent sizes ', 1 )
!
! ... three dimensional indexes
!
@ -124,45 +177,27 @@ SUBROUTINE make_pointlists
i0 = idx
! ... do not include points outside the physical range
IF ( i0 >= dfftp%nr1 .OR. j0 >= dfftp%nr2 .OR. k0 >= dfftp%nr3 ) CYCLE
DO i = i0-dfftp%nr1,i0+dfftp%nr1, dfftp%nr1
DO j = j0-dfftp%nr2, j0+dfftp%nr2, dfftp%nr2
DO k = k0-dfftp%nr3, k0+dfftp%nr3, dfftp%nr3
DO ipol=1,3
posi(ipol) = DBLE(i)/DBLE(dfftp%nr1) * at(ipol,1) &
+ DBLE(j)/DBLE(dfftp%nr2) * at(ipol,2) &
+ DBLE(k)/DBLE(dfftp%nr3) * at(ipol,3)
ENDDO
posi(1) = DBLE(i0)/DBLE(dfftp%nr1)
posi(2) = DBLE(j0)/DBLE(dfftp%nr2)
posi(3) = DBLE(k0)/DBLE(dfftp%nr3)
call compute_distances_SoA(posi, nat, tau_SoA, distances)
DO iat = 1,nat
nt=ityp(iat)
distance = SQRT( (posi(1)-tau0(1,iat))**2 + &
(posi(2)-tau0(2,iat))**2 + &
(posi(3)-tau0(3,iat))**2)
IF (distance.LE.r_m(nt)) THEN
IF( ir .GT. SIZE( factlist ) .OR. ir .GT. SIZE( pointlist )) &
CALL errore( ' make_pointlists ', ' inconsistent sizes ', 1 )
factlist(ir) = 1.d0
pointlist(ir) = iat
GO TO 10
ELSE IF (distance.LE.1.2*r_m(nt)) THEN
IF( ir .GT. SIZE( factlist ) .OR. ir .GT. SIZE( pointlist )) &
CALL errore( ' make_pointlists ', ' inconsistent sizes ', 1 )
factlist(ir) = 1.d0 - (distance -r_m(nt))/(0.2d0*r_m(nt))
pointlist(ir) = iat
GO TO 10
ENDIF
ENDDO
ENDDO ! k
ENDDO ! j
ENDDO ! i
10 CONTINUE
DO iat = 1,nat
nt=ityp(iat)
IF (distances(iat).LE.r_m(nt)) THEN
factlist(ir) = 1.d0
pointlist(ir) = iat
EXIT
ELSE IF (distances(iat).LE.1.2*r_m(nt)) THEN
factlist(ir) = 1.d0 - (distances(iat) -r_m(nt))/(0.2d0*r_m(nt))
pointlist(ir) = iat
EXIT
ENDIF
ENDDO
ENDDO ! ir
DEALLOCATE(tau0)
DEALLOCATE(tau0, tau_SoA, distances)
END SUBROUTINE make_pointlists

View File

@ -386,10 +386,10 @@ SUBROUTINE memory_report()
maxram = ram + ram_
!
! arrays used for global sorting in ggen:
! mill_g, mill_unsorted, igsrt, g2sort_g, total dimensions:
! igsrt, g2l, g2sort_g, total dimensions:
!
IF ( .NOT. smallmem ) maxram = MAX ( maxram, &
int_size * 7 * ngm_g + real_size * ngm_g )
int_size * 2 * ngm_g + real_size * ngm_g )
!
totram = maxram * nproc_image
IF ( iverbosity > 0 ) THEN