mirror of https://gitlab.com/QEF/q-e.git
Merge branch 'opt-init-rebase' into 'master'
Optimize initialization See merge request QEF/q-e!6
This commit is contained in:
commit
e31aa94fe1
|
@ -56,22 +56,22 @@ CONTAINS
|
|||
!
|
||||
! here a few local variables
|
||||
!
|
||||
REAL(DP) :: t (3), tt
|
||||
INTEGER :: ngm_save, n1, n2, n3, ngm_offset, ngm_max
|
||||
REAL(DP) :: tx(3), ty(3), t(3)
|
||||
REAL(DP), ALLOCATABLE :: tt(:)
|
||||
INTEGER :: ngm_save, n1, n2, n3, ngm_offset, ngm_max, ngm_local
|
||||
!
|
||||
REAL(DP), ALLOCATABLE :: g2sort_g(:)
|
||||
! array containing all g vectors, on all processors: replicated data
|
||||
! when no_global_sort is present (and it is true) only g vectors for
|
||||
! the current processor are stored
|
||||
INTEGER, ALLOCATABLE :: mill_g(:,:), mill_unsorted(:,:)
|
||||
! array containing only g vectors for the current processor
|
||||
INTEGER, ALLOCATABLE :: mill_unsorted(:,:)
|
||||
! array containing all g vectors generators, on all processors
|
||||
! (replicated data). When no_global_sort is present and .true.,
|
||||
! only g-vectors for the current processor are stored
|
||||
INTEGER, ALLOCATABLE :: igsrt(:)
|
||||
INTEGER, ALLOCATABLE :: igsrt(:), g2l(:)
|
||||
!
|
||||
INTEGER :: ni, nj, nk, i, j, k, ipol, ng, igl, indsw
|
||||
INTEGER :: istart, jstart, kstart
|
||||
INTEGER :: mype, npe
|
||||
LOGICAL :: global_sort
|
||||
LOGICAL :: global_sort, is_local
|
||||
INTEGER, ALLOCATABLE :: ngmpe(:)
|
||||
!
|
||||
global_sort = .TRUE.
|
||||
|
@ -90,6 +90,7 @@ CONTAINS
|
|||
ngm_save = ngm
|
||||
!
|
||||
ngm = 0
|
||||
ngm_local = 0
|
||||
!
|
||||
! set the total number of fft mesh points and and initial value of gg
|
||||
! The choice of gcutm is due to the fact that we have to order the
|
||||
|
@ -99,54 +100,98 @@ CONTAINS
|
|||
!
|
||||
! and computes all the g vectors inside a sphere
|
||||
!
|
||||
ALLOCATE( mill_g( 3, ngm_max ),mill_unsorted( 3, ngm_max ) )
|
||||
ALLOCATE( mill_unsorted( 3, ngm_save ) )
|
||||
ALLOCATE( igsrt( ngm_max ) )
|
||||
ALLOCATE( g2l( ngm_max ) )
|
||||
ALLOCATE( g2sort_g( ngm_max ) )
|
||||
!
|
||||
g2sort_g(:) = 1.0d20
|
||||
!
|
||||
! allocate temporal array
|
||||
!
|
||||
ALLOCATE( tt( dfftp%nr3 ) )
|
||||
!
|
||||
! max miller indices (same convention as in module stick_set)
|
||||
!
|
||||
ni = (dfftp%nr1-1)/2
|
||||
nj = (dfftp%nr2-1)/2
|
||||
nk = (dfftp%nr3-1)/2
|
||||
!
|
||||
iloop: DO i = -ni, ni
|
||||
! gamma-only: exclude space with x < 0
|
||||
!
|
||||
IF ( gamma_only ) THEN
|
||||
istart = 0
|
||||
ELSE
|
||||
istart = -ni
|
||||
ENDIF
|
||||
!
|
||||
iloop: DO i = istart, ni
|
||||
!
|
||||
! gamma-only: exclude space with x < 0
|
||||
! gamma-only: exclude plane with x = 0, y < 0
|
||||
!
|
||||
IF ( gamma_only .and. i < 0) CYCLE iloop
|
||||
jloop: DO j = -nj, nj
|
||||
IF ( gamma_only .and. i == 0 ) THEN
|
||||
jstart = 0
|
||||
ELSE
|
||||
jstart = -nj
|
||||
ENDIF
|
||||
!
|
||||
tx(1:3) = i * bg(1:3,1)
|
||||
!
|
||||
jloop: DO j = jstart, nj
|
||||
!
|
||||
! gamma-only: exclude plane with x = 0, y < 0
|
||||
!
|
||||
IF ( gamma_only .and. i == 0 .and. j < 0) CYCLE jloop
|
||||
|
||||
IF( .NOT. global_sort ) THEN
|
||||
IF ( fft_stick_index( dfftp, i, j ) == 0) CYCLE jloop
|
||||
IF ( .NOT. global_sort ) THEN
|
||||
IF ( fft_stick_index( dfftp, i, j ) == 0 ) CYCLE jloop
|
||||
is_local = .TRUE.
|
||||
ELSE
|
||||
IF ( dfftp%lpara .AND. fft_stick_index( dfftp, i, j ) == 0) THEN
|
||||
is_local = .FALSE.
|
||||
ELSE
|
||||
is_local = .TRUE.
|
||||
END IF
|
||||
END IF
|
||||
|
||||
kloop: DO k = -nk, nk
|
||||
!
|
||||
! gamma-only: exclude line with x = 0, y = 0, z < 0
|
||||
!
|
||||
IF ( gamma_only .and. i == 0 .and. j == 0 ) THEN
|
||||
kstart = 0
|
||||
ELSE
|
||||
kstart = -nk
|
||||
ENDIF
|
||||
!
|
||||
ty(1:3) = tx(1:3) + j * bg(1:3,2)
|
||||
!
|
||||
! compute all the norm square
|
||||
!
|
||||
DO k = kstart, nk
|
||||
!
|
||||
! gamma-only: exclude line with x = 0, y = 0, z < 0
|
||||
!
|
||||
IF ( gamma_only .and. i == 0 .and. j == 0 .and. k < 0) CYCLE kloop
|
||||
t(:) = i * bg (:,1) + j * bg (:,2) + k * bg (:,3)
|
||||
tt = t(1)**2+t(2)**2+t(3)**2
|
||||
IF (tt <= gcutm) THEN
|
||||
t(1) = ty(1) + k * bg(1,3)
|
||||
t(2) = ty(2) + k * bg(2,3)
|
||||
t(3) = ty(3) + k * bg(3,3)
|
||||
tt(k-kstart+1) = t(1)**2 + t(2)**2 + t(3)**2
|
||||
ENDDO
|
||||
!
|
||||
! save all the norm square within cutoff
|
||||
!
|
||||
DO k = kstart, nk
|
||||
IF (tt(k-kstart+1) <= gcutm) THEN
|
||||
ngm = ngm + 1
|
||||
IF (ngm > ngm_max) CALL errore ('ggen 1', 'too many g-vectors', ngm)
|
||||
mill_unsorted( :, ngm ) = (/ i,j,k /)
|
||||
IF ( tt > eps8 ) THEN
|
||||
g2sort_g(ngm) = tt
|
||||
IF ( tt(k-kstart+1) > eps8 ) THEN
|
||||
g2sort_g(ngm) = tt(k-kstart+1)
|
||||
ELSE
|
||||
g2sort_g(ngm) = 0.d0
|
||||
ENDIF
|
||||
IF (is_local) THEN
|
||||
ngm_local = ngm_local + 1
|
||||
mill_unsorted( :, ngm_local ) = (/ i,j,k /)
|
||||
g2l(ngm) = ngm_local
|
||||
ELSE
|
||||
g2l(ngm) = 0
|
||||
ENDIF
|
||||
ENDIF
|
||||
ENDDO kloop
|
||||
ENDDO
|
||||
ENDDO jloop
|
||||
ENDDO iloop
|
||||
|
||||
IF (ngm /= ngm_max) &
|
||||
CALL errore ('ggen', 'g-vectors missing !', abs(ngm - ngm_max))
|
||||
!
|
||||
|
@ -156,10 +201,7 @@ CONTAINS
|
|||
ELSE
|
||||
CALL hpsort_eps( ngm_g, g2sort_g, igsrt, eps8 )
|
||||
END IF
|
||||
mill_g(1,:) = mill_unsorted(1,igsrt(:))
|
||||
mill_g(2,:) = mill_unsorted(2,igsrt(:))
|
||||
mill_g(3,:) = mill_unsorted(3,igsrt(:))
|
||||
DEALLOCATE( g2sort_g, igsrt, mill_unsorted )
|
||||
DEALLOCATE( g2sort_g, tt )
|
||||
|
||||
IF( .NOT. global_sort ) THEN
|
||||
!
|
||||
|
@ -183,33 +225,31 @@ CONTAINS
|
|||
ngm = 0
|
||||
!
|
||||
ngloop: DO ng = 1, ngm_max
|
||||
|
||||
i = mill_g(1, ng)
|
||||
j = mill_g(2, ng)
|
||||
k = mill_g(3, ng)
|
||||
|
||||
IF( dfftp%lpara .AND. global_sort ) THEN
|
||||
IF ( fft_stick_index( dfftp, i, j ) == 0) CYCLE ngloop
|
||||
END IF
|
||||
|
||||
ngm = ngm + 1
|
||||
|
||||
! Here map local and global g index !!! N.B: :
|
||||
! the global G vectors arrangement depends on the number of processors
|
||||
!
|
||||
IF( .NOT. global_sort ) THEN
|
||||
ig_l2g( ngm ) = ng + ngm_offset
|
||||
ELSE
|
||||
ig_l2g( ngm ) = ng
|
||||
END IF
|
||||
|
||||
g (1:3, ngm) = i * bg (:, 1) + j * bg (:, 2) + k * bg (:, 3)
|
||||
gg (ngm) = sum(g (1:3, ngm)**2)
|
||||
IF (g2l(igsrt(ng))>0) THEN
|
||||
! fetch the indices
|
||||
i = mill_unsorted(1, g2l(igsrt(ng)))
|
||||
j = mill_unsorted(2, g2l(igsrt(ng)))
|
||||
k = mill_unsorted(3, g2l(igsrt(ng)))
|
||||
!
|
||||
ngm = ngm + 1
|
||||
!
|
||||
! Here map local and global g index !!! N.B: :
|
||||
! the global G vectors arrangement depends on the number of processors
|
||||
!
|
||||
IF( .NOT. global_sort ) THEN
|
||||
ig_l2g( ngm ) = ng + ngm_offset
|
||||
ELSE
|
||||
ig_l2g( ngm ) = ng
|
||||
END IF
|
||||
|
||||
g(1:3, ngm) = i * bg (:, 1) + j * bg (:, 2) + k * bg (:, 3)
|
||||
gg(ngm) = sum(g(1:3, ngm)**2)
|
||||
ENDIF
|
||||
ENDDO ngloop
|
||||
|
||||
DEALLOCATE( mill_g )
|
||||
|
||||
|
||||
DEALLOCATE( igsrt, g2l )
|
||||
|
||||
IF (ngm /= ngm_save) &
|
||||
CALL errore ('ggen', 'g-vectors (ngm) missing !', abs(ngm - ngm_save))
|
||||
!
|
||||
|
|
|
@ -7,6 +7,64 @@
|
|||
!
|
||||
!
|
||||
!--------------------------------------------------------------------------
|
||||
SUBROUTINE compute_distances_SoA(pos, N, RSoA, Distances)
|
||||
!--------------------------------------------------------------------------
|
||||
!
|
||||
! This routine computes the distance between pos and all the centers in RSoA
|
||||
! All the positions are in crystal coordinates
|
||||
!
|
||||
USE kinds, ONLY : dp
|
||||
USE cell_base, ONLY : at
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
REAL(DP),INTENT(in) :: pos(3) ! reference position
|
||||
INTEGER,INTENT(in) :: N ! number of elements in RSoA
|
||||
REAL(DP),INTENT(in) :: RSoA(N,3) ! positions in SoA layout
|
||||
REAL(DP),INTENT(out) :: Distances(N) ! minimal distances
|
||||
!
|
||||
REAL(DP) :: corners(3,8)
|
||||
REAL(DP) :: dx, dy, dz, dx_c, dy_c, dz_c
|
||||
REAL(DP) :: dist, dist_min
|
||||
INTEGER :: ix, iy, iz, ic, iat
|
||||
|
||||
ic = 0
|
||||
DO ix = 0,1
|
||||
dx = DBLE(-ix)
|
||||
DO iy = 0,1
|
||||
dy = DBLE(-iy)
|
||||
DO iz = 0,1
|
||||
dz = DBLE(-iz)
|
||||
ic = ic + 1
|
||||
corners(1,ic) = dx*at(1,1) + dy*at(1,2) + dz*at(1,3)
|
||||
corners(2,ic) = dx*at(2,1) + dy*at(2,2) + dz*at(2,3)
|
||||
corners(3,ic) = dx*at(3,1) + dy*at(3,2) + dz*at(3,3)
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
|
||||
DO iat = 1,N
|
||||
dx = RSoA(iat,1) - pos(1)
|
||||
dx = dx - FLOOR(dx)
|
||||
dy = RSoA(iat,2) - pos(2)
|
||||
dy = dy - FLOOR(dy)
|
||||
dz = RSoA(iat,3) - pos(3)
|
||||
dz = dz - FLOOR(dz)
|
||||
dx_c = dx*at(1,1) + dy*at(1,2) + dz*at(1,3)
|
||||
dy_c = dx*at(2,1) + dy*at(2,2) + dz*at(2,3)
|
||||
dz_c = dx*at(3,1) + dy*at(3,2) + dz*at(3,3)
|
||||
dist_min = dx_c*dx_c + dy_c*dy_c + dz_c*dz_c;
|
||||
DO ic = 2,8
|
||||
dx = dx_c + corners(1,ic)
|
||||
dy = dy_c + corners(2,ic)
|
||||
dz = dz_c + corners(3,ic)
|
||||
dist = dx*dx + dy*dy + dz*dz
|
||||
IF (dist<dist_min) dist_min = dist
|
||||
ENDDO
|
||||
Distances(iat) = SQRT(dist_min)
|
||||
ENDDO
|
||||
END SUBROUTINE compute_distances_SoA
|
||||
|
||||
SUBROUTINE make_pointlists
|
||||
!--------------------------------------------------------------------------
|
||||
!
|
||||
|
@ -35,11 +93,11 @@ SUBROUTINE make_pointlists
|
|||
INTEGER idx,indproc,iat,ir,iat1
|
||||
INTEGER i,j,k,i0,j0,k0,jj0,kk0,ipol,nt,nt1
|
||||
|
||||
REAL(DP) :: posi(3), distance
|
||||
REAL(DP), ALLOCATABLE :: tau0(:,:), distmin(:)
|
||||
REAL(DP) :: posi(3), WS_radius, dist
|
||||
REAL(DP), ALLOCATABLE :: tau0(:,:), tau_SoA(:,:), distmin(:), distances(:)
|
||||
|
||||
WRITE( stdout,'(5x,"Generating pointlists ...")')
|
||||
ALLOCATE(tau0(3,nat))
|
||||
ALLOCATE(tau0(3,nat), tau_SoA(nat,3), distances(nat))
|
||||
ALLOCATE( distmin(ntyp) )
|
||||
|
||||
! Bring all the atomic positions on the first unit cell
|
||||
|
@ -48,43 +106,35 @@ SUBROUTINE make_pointlists
|
|||
CALL cryst_to_cart(nat,tau0,bg,-1)
|
||||
DO iat=1,nat
|
||||
DO ipol=1,3
|
||||
tau0(ipol,iat)=tau0(ipol,iat)-NINT(tau0(ipol,iat))
|
||||
tau_SoA(iat,ipol)=tau0(ipol,iat)
|
||||
ENDDO
|
||||
ENDDO
|
||||
CALL cryst_to_cart(nat,tau0,at,1)
|
||||
|
||||
! Check the minimum distance between two atoms in the system
|
||||
WS_radius = 1.d100
|
||||
DO i = -1,1
|
||||
DO j = -1,1
|
||||
DO k = -1,1
|
||||
IF ( i==0 .AND. j==0 .AND. k==0 ) CYCLE
|
||||
dist = ( DBLE(i)*at(1,1) + DBLE(j)*at(1,2) + DBLE(k)*at(1,3) )**2 &
|
||||
+ ( DBLE(i)*at(2,1) + DBLE(j)*at(2,2) + DBLE(k)*at(2,3) )**2 &
|
||||
+ ( DBLE(i)*at(3,1) + DBLE(j)*at(3,2) + DBLE(k)*at(3,3) )**2
|
||||
IF (dist<WS_radius) WS_radius = dist
|
||||
ENDDO
|
||||
ENDDO
|
||||
ENDDO
|
||||
WS_radius = SQRT(WS_radius)
|
||||
|
||||
distmin(:) = 1.d0
|
||||
distmin(:) = WS_radius
|
||||
|
||||
DO iat = 1,nat
|
||||
nt = ityp(iat)
|
||||
call compute_distances_SoA(tau0(1:3,iat), nat, tau_SoA, distances)
|
||||
DO iat1 = 1,nat
|
||||
IF (iat.eq.iat1) CYCLE
|
||||
nt1 = ityp(iat1)
|
||||
|
||||
! posi is the position of a second atom
|
||||
DO i = -1,1
|
||||
DO j = -1,1
|
||||
DO k = -1,1
|
||||
|
||||
distance = 0.d0
|
||||
DO ipol = 1,3
|
||||
posi(ipol) = tau0(ipol,iat1) + DBLE(i)*at(ipol,1) &
|
||||
+ DBLE(j)*at(ipol,2) &
|
||||
+ DBLE(k)*at(ipol,3)
|
||||
distance = distance + (posi(ipol)-tau0(ipol,iat))**2
|
||||
ENDDO
|
||||
|
||||
distance = SQRT(distance)
|
||||
IF ((distance.LT.distmin(nt)).AND.(distance.GT.1.d-8)) &
|
||||
& distmin(nt) = distance
|
||||
IF ((distance.LT.distmin(nt1)).AND.(distance.GT.1.d-8)) &
|
||||
& distmin(nt1) = distance
|
||||
|
||||
ENDDO ! k
|
||||
ENDDO ! j
|
||||
ENDDO ! i
|
||||
|
||||
IF (distances(iat1).LT.distmin(nt)) distmin(nt) = distances(iat1)
|
||||
IF (distances(iat1).LT.distmin(nt1)) distmin(nt1) = distances(iat1)
|
||||
ENDDO ! iat1
|
||||
ENDDO ! iat
|
||||
|
||||
|
@ -111,6 +161,9 @@ SUBROUTINE make_pointlists
|
|||
factlist(:) = 0.d0
|
||||
jj0 = dfftp%my_i0r2p ; kk0 = dfftp%my_i0r3p
|
||||
DO ir = 1, dfftp%nr1x*dfftp%my_nr2p*dfftp%my_nr3p
|
||||
! ... check result vector boundary
|
||||
IF( ir .GT. SIZE( factlist ) .OR. ir .GT. SIZE( pointlist )) &
|
||||
CALL errore( ' make_pointlists ', ' inconsistent sizes ', 1 )
|
||||
!
|
||||
! ... three dimensional indexes
|
||||
!
|
||||
|
@ -124,45 +177,27 @@ SUBROUTINE make_pointlists
|
|||
i0 = idx
|
||||
|
||||
! ... do not include points outside the physical range
|
||||
|
||||
IF ( i0 >= dfftp%nr1 .OR. j0 >= dfftp%nr2 .OR. k0 >= dfftp%nr3 ) CYCLE
|
||||
|
||||
DO i = i0-dfftp%nr1,i0+dfftp%nr1, dfftp%nr1
|
||||
DO j = j0-dfftp%nr2, j0+dfftp%nr2, dfftp%nr2
|
||||
DO k = k0-dfftp%nr3, k0+dfftp%nr3, dfftp%nr3
|
||||
DO ipol=1,3
|
||||
posi(ipol) = DBLE(i)/DBLE(dfftp%nr1) * at(ipol,1) &
|
||||
+ DBLE(j)/DBLE(dfftp%nr2) * at(ipol,2) &
|
||||
+ DBLE(k)/DBLE(dfftp%nr3) * at(ipol,3)
|
||||
ENDDO
|
||||
posi(1) = DBLE(i0)/DBLE(dfftp%nr1)
|
||||
posi(2) = DBLE(j0)/DBLE(dfftp%nr2)
|
||||
posi(3) = DBLE(k0)/DBLE(dfftp%nr3)
|
||||
call compute_distances_SoA(posi, nat, tau_SoA, distances)
|
||||
|
||||
DO iat = 1,nat
|
||||
nt=ityp(iat)
|
||||
distance = SQRT( (posi(1)-tau0(1,iat))**2 + &
|
||||
(posi(2)-tau0(2,iat))**2 + &
|
||||
(posi(3)-tau0(3,iat))**2)
|
||||
|
||||
IF (distance.LE.r_m(nt)) THEN
|
||||
IF( ir .GT. SIZE( factlist ) .OR. ir .GT. SIZE( pointlist )) &
|
||||
CALL errore( ' make_pointlists ', ' inconsistent sizes ', 1 )
|
||||
factlist(ir) = 1.d0
|
||||
pointlist(ir) = iat
|
||||
GO TO 10
|
||||
ELSE IF (distance.LE.1.2*r_m(nt)) THEN
|
||||
IF( ir .GT. SIZE( factlist ) .OR. ir .GT. SIZE( pointlist )) &
|
||||
CALL errore( ' make_pointlists ', ' inconsistent sizes ', 1 )
|
||||
factlist(ir) = 1.d0 - (distance -r_m(nt))/(0.2d0*r_m(nt))
|
||||
pointlist(ir) = iat
|
||||
GO TO 10
|
||||
ENDIF
|
||||
ENDDO
|
||||
|
||||
ENDDO ! k
|
||||
ENDDO ! j
|
||||
ENDDO ! i
|
||||
10 CONTINUE
|
||||
DO iat = 1,nat
|
||||
nt=ityp(iat)
|
||||
IF (distances(iat).LE.r_m(nt)) THEN
|
||||
factlist(ir) = 1.d0
|
||||
pointlist(ir) = iat
|
||||
EXIT
|
||||
ELSE IF (distances(iat).LE.1.2*r_m(nt)) THEN
|
||||
factlist(ir) = 1.d0 - (distances(iat) -r_m(nt))/(0.2d0*r_m(nt))
|
||||
pointlist(ir) = iat
|
||||
EXIT
|
||||
ENDIF
|
||||
ENDDO
|
||||
ENDDO ! ir
|
||||
DEALLOCATE(tau0)
|
||||
DEALLOCATE(tau0, tau_SoA, distances)
|
||||
|
||||
END SUBROUTINE make_pointlists
|
||||
|
||||
|
|
|
@ -386,10 +386,10 @@ SUBROUTINE memory_report()
|
|||
maxram = ram + ram_
|
||||
!
|
||||
! arrays used for global sorting in ggen:
|
||||
! mill_g, mill_unsorted, igsrt, g2sort_g, total dimensions:
|
||||
! igsrt, g2l, g2sort_g, total dimensions:
|
||||
!
|
||||
IF ( .NOT. smallmem ) maxram = MAX ( maxram, &
|
||||
int_size * 7 * ngm_g + real_size * ngm_g )
|
||||
int_size * 2 * ngm_g + real_size * ngm_g )
|
||||
!
|
||||
totram = maxram * nproc_image
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
|
|
Loading…
Reference in New Issue