mirror of https://gitlab.com/QEF/q-e.git
Merge commit '3c87bac5e67b5e30b9c5d7e7d3a69f9fb4285e1b' into gpu-develop
This commit is contained in:
commit
56c3090769
|
@ -170,11 +170,6 @@ MODULE fft_types
|
|||
INTEGER, ALLOCATABLE :: srh(:,:) ! Isend/recv handles by subbatch
|
||||
#endif
|
||||
COMPLEX(DP), ALLOCATABLE, DIMENSION(:) :: aux
|
||||
#if defined(_OPENMP)
|
||||
INTEGER, ALLOCATABLE :: comm2s(:) ! multiple communicator for the fft group along the second direction
|
||||
INTEGER, ALLOCATABLE :: comm3s(:) ! multiple communicator for the fft group along the third direction
|
||||
#endif
|
||||
|
||||
END TYPE
|
||||
|
||||
REAL(DP) :: fft_dual = 4.0d0
|
||||
|
@ -245,14 +240,6 @@ CONTAINS
|
|||
CALL MPI_COMM_SPLIT( comm, color, key, desc%comm3, ierr )
|
||||
CALL MPI_COMM_RANK( desc%comm3, desc%mype3, ierr )
|
||||
CALL MPI_COMM_SIZE( desc%comm3, desc%nproc3, ierr )
|
||||
#if defined(_OPENMP)
|
||||
ALLOCATE( desc%comm2s( OMP_GET_MAX_THREADS() ))
|
||||
ALLOCATE( desc%comm3s( OMP_GET_MAX_THREADS() ))
|
||||
DO i=1, OMP_GET_MAX_THREADS()
|
||||
CALL MPI_COMM_DUP(desc%comm2, desc%comm2s(i), ierr)
|
||||
CALL MPI_COMM_DUP(desc%comm3, desc%comm3s(i), ierr)
|
||||
ENDDO
|
||||
#endif
|
||||
#else
|
||||
desc%comm2 = desc%comm ; desc%mype2 = desc%mype ; desc%nproc2 = desc%nproc
|
||||
desc%comm3 = desc%comm ; desc%mype3 = desc%mype ; desc%nproc3 = desc%nproc
|
||||
|
@ -446,14 +433,6 @@ CONTAINS
|
|||
#if defined(__MPI)
|
||||
IF (desc%comm2 /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2, ierr )
|
||||
IF (desc%comm3 /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3, ierr )
|
||||
#if defined(_OPENMP)
|
||||
DO i=1, SIZE(desc%comm2s)
|
||||
IF (desc%comm2s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2s(i), ierr )
|
||||
IF (desc%comm3s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3s(i), ierr )
|
||||
ENDDO
|
||||
DEALLOCATE( desc%comm2s )
|
||||
DEALLOCATE( desc%comm3s )
|
||||
#endif
|
||||
#else
|
||||
desc%comm2 = MPI_COMM_NULL
|
||||
desc%comm3 = MPI_COMM_NULL
|
||||
|
@ -493,24 +472,6 @@ CONTAINS
|
|||
INTEGER :: ierr
|
||||
!write (6,*) ' inside fft_type_set' ; FLUSH(6)
|
||||
!
|
||||
#if defined(__MPI)
|
||||
#if defined(_OPENMP)
|
||||
IF (nmany > OMP_GET_MAX_THREADS()) THEN
|
||||
DO i=1, SIZE(desc%comm2s)
|
||||
IF (desc%comm2s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2s(i), ierr )
|
||||
IF (desc%comm3s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3s(i), ierr )
|
||||
ENDDO
|
||||
DEALLOCATE( desc%comm2s )
|
||||
DEALLOCATE( desc%comm3s )
|
||||
ALLOCATE( desc%comm2s( nmany ))
|
||||
ALLOCATE( desc%comm3s( nmany ))
|
||||
DO i=1, nmany
|
||||
CALL MPI_COMM_DUP(desc%comm2, desc%comm2s(i), ierr)
|
||||
CALL MPI_COMM_DUP(desc%comm3, desc%comm3s(i), ierr)
|
||||
ENDDO
|
||||
ENDIF
|
||||
#endif
|
||||
#endif
|
||||
!
|
||||
IF (.NOT. ALLOCATED( desc%nsp ) ) &
|
||||
CALL fftx_error__(' fft_type_set ', ' fft arrays not yet allocated ', 1 )
|
||||
|
|
|
@ -61,7 +61,7 @@ SUBROUTINE hp_postproc
|
|||
eps3 = 1.d-4 ! the same threshold for the comparison of distances
|
||||
! as in PW/src/inter_V.f90 DFT+U+V
|
||||
!
|
||||
CHARACTER(len=50) :: filenameU
|
||||
CHARACTER(len=256) :: filenameU
|
||||
INTEGER, EXTERNAL :: find_free_unit
|
||||
!
|
||||
CALL start_clock('hp_calc_U')
|
||||
|
|
|
@ -13,7 +13,7 @@ SUBROUTINE elphon()
|
|||
! Electron-phonon calculation from data saved in fildvscf
|
||||
!
|
||||
USE kinds, ONLY : DP
|
||||
USE constants, ONLY : amu_ry
|
||||
USE constants, ONLY : amu_ry, RY_TO_THZ, RY_TO_CMM1
|
||||
USE cell_base, ONLY : celldm, omega, ibrav, at, bg
|
||||
USE ions_base, ONLY : nat, ntyp => nsp, ityp, tau, amass
|
||||
USE gvecs, ONLY: doublegrid
|
||||
|
@ -50,7 +50,7 @@ SUBROUTINE elphon()
|
|||
COMPLEX(DP), allocatable :: phip (:, :, :, :)
|
||||
|
||||
INTEGER :: ntyp_, nat_, ibrav_, nspin_mag_, mu, nu, na, nb, nta, ntb, nqs_
|
||||
REAL(DP) :: celldm_(6)
|
||||
REAL(DP) :: celldm_(6), w1
|
||||
CHARACTER(LEN=3) :: atm(ntyp)
|
||||
|
||||
CALL start_clock ('elphon')
|
||||
|
@ -165,9 +165,27 @@ SUBROUTINE elphon()
|
|||
|
||||
deallocate( phip )
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
! Write phonon frequency to stdout
|
||||
!
|
||||
WRITE( stdout, 8000) (xq (i), i = 1, 3)
|
||||
!
|
||||
DO nu = 1, 3 * nat
|
||||
w1 = SQRT( ABS( w2(nu) ) )
|
||||
if (w2(nu) < 0.d0) w1 = - w1
|
||||
WRITE( stdout, 8010) nu, w1 * RY_TO_THZ, w1 * RY_TO_CMM1
|
||||
ENDDO
|
||||
!
|
||||
WRITE( stdout, '(1x,74("*"))')
|
||||
!
|
||||
ENDIF ! .NOT. trans
|
||||
!
|
||||
CALL stop_clock ('elphon')
|
||||
!
|
||||
8000 FORMAT(/,5x,'Diagonalizing the dynamical matrix', &
|
||||
& //,5x,'q = ( ',3f14.9,' ) ',//,1x,74('*'))
|
||||
8010 FORMAT (5x,'freq (',i5,') =',f15.6,' [THz] =',f15.6,' [cm-1]')
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE elphon
|
||||
!
|
||||
|
|
|
@ -83,16 +83,17 @@ SUBROUTINE openfilq()
|
|||
ELSE
|
||||
! this is the standard treatment
|
||||
IF (lgamma.AND.modenum==0.AND..NOT.newgrid ) tmp_dir=tmp_dir_save
|
||||
IF ((noncolin.AND.domag).OR.lsda) tmp_dir=tmp_dir_phq
|
||||
! FIXME: why this case?
|
||||
IF ( noncolin.AND.domag ) tmp_dir=tmp_dir_phq
|
||||
ENDIF
|
||||
!!!!!!!!!!!!!!!!!!!!!!!! END OF ACFDT TEST !!!!!!!!!!!!!!!!
|
||||
iuwfc = 20
|
||||
lrwfc = nbnd * npwx * npol
|
||||
CALL open_buffer (iuwfc, 'wfc', lrwfc, io_level, exst_mem, exst, tmp_dir)
|
||||
IF (.NOT.exst.AND..NOT.exst_mem.and..not.all_done) THEN
|
||||
tmp_dir = tmp_dir_phq
|
||||
!FIXME Dirty fix for obscure case, likely obsolete?
|
||||
CALL close_buffer(iuwfc, 'delete')
|
||||
!FIXME Dirty fix for obscure case
|
||||
tmp_dir = tmp_dir_phq
|
||||
CALL open_buffer (iuwfc, 'wfc', lrwfc, io_level, exst_mem, exst, tmp_dir)
|
||||
IF (.NOT.exst.AND..NOT.exst_mem) CALL errore ('openfilq', 'file '//trim(prefix)//'.wfc not found', 1)
|
||||
END IF
|
||||
|
|
|
@ -89,8 +89,8 @@ example05:
|
|||
tensor for AlAs.
|
||||
|
||||
example06
|
||||
This example shows how to use ph.x to calculate
|
||||
the phonon frequencies at Gamma and X of fcc-Pt.
|
||||
This example shows how to use ph.x to calculate the phonon frequencies
|
||||
at Gamma and X and the dispersion for fcc-Pt with spin-orbit interactions.
|
||||
|
||||
example07:
|
||||
This example tests pw.x and ph.x in several cases that require the
|
||||
|
@ -120,7 +120,7 @@ example12:
|
|||
modes of a molecule (SiH4) at Gamma.
|
||||
|
||||
example13:
|
||||
Deleted
|
||||
Full dispersions for spin-polarized phonons (Ni)
|
||||
|
||||
example14:
|
||||
This example shows how to use ph.x to calculate the phonon frequencies
|
||||
|
|
|
@ -10,3 +10,5 @@ The calculation proceeds as follows:
|
|||
output=pt.ph.out).
|
||||
|
||||
3) make a phonon calculation at X (input=pt.phX.in, output=pt.phX.out).
|
||||
|
||||
4) make a phonon dispersion calculation (input=pt.ph.in, output=pt.ph.out).
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -157,5 +157,22 @@ $PH_COMMAND < pt.phX.in > pt.phX.out
|
|||
check_failure $?
|
||||
$ECHO " done"
|
||||
|
||||
cat > pt.ph.in << EOF
|
||||
phonon dispersions of Pt
|
||||
&inputph
|
||||
amass(1)=195.078,
|
||||
prefix='platinum',
|
||||
outdir='$TMP_DIR'
|
||||
fildyn='ptdyn',
|
||||
tr2_ph=1.0d-16,
|
||||
ldisp=.true., nq1=4,nq2=4,nq3=4
|
||||
/
|
||||
EOF
|
||||
$ECHO " running the phonon dispersions calculation for Pt with spin-orbit coupling...\c"
|
||||
$PH_COMMAND < pt.ph.in > pt.ph.out
|
||||
check_failure $?
|
||||
$ECHO " done"
|
||||
|
||||
$ECHO
|
||||
$ECHO "$EXAMPLE_DIR: done"
|
||||
|
||||
|
|
|
@ -29,6 +29,6 @@ The calculation proceeds as follows:
|
|||
insulator with noncollinear magnetization.
|
||||
(input=o2_nc.scf.in, output=o2_nc.scf.out)
|
||||
|
||||
8) make a self-consistent calculation for the O2 molecule treated as an
|
||||
insulator with noncollinear magnetization.
|
||||
8) make a phonon calculatio at the Gamma point for the O2 molecule
|
||||
treated as an insulator with noncollinear magnetization.
|
||||
(input=o2_nc.phG.in, output=o2_nc.phG.out)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,461 @@
|
|||
|
||||
Program PWSCF v.6.5 starts on 19Mar2020 at 9:46: 9
|
||||
|
||||
This program is part of the open-source Quantum ESPRESSO suite
|
||||
for quantum simulation of materials; please cite
|
||||
"P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009);
|
||||
"P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017);
|
||||
URL http://www.quantum-espresso.org",
|
||||
in publications or presentations arising from this work. More details at
|
||||
http://www.quantum-espresso.org/quote
|
||||
|
||||
Parallel version (MPI & OpenMP), running on 4 processor cores
|
||||
Number of MPI processes: 4
|
||||
Threads/MPI process: 1
|
||||
|
||||
MPI processes distributed on 1 nodes
|
||||
R & G space division: proc/nbgrp/npool/nimage = 4
|
||||
Waiting for input...
|
||||
Reading input from standard input
|
||||
|
||||
Current dimensions of program PWSCF are:
|
||||
Max number of different atomic species (ntypx) = 10
|
||||
Max number of k-points (npk) = 40000
|
||||
Max angular momentum in pseudopotentials (lmaxx) = 3
|
||||
file Ni.pbe-nd-rrkjus.UPF: wavefunction(s) 4S renormalized
|
||||
|
||||
Subspace diagonalization in iterative solution of the eigenvalue problem:
|
||||
a serial algorithm will be used
|
||||
|
||||
|
||||
Parallelization info
|
||||
--------------------
|
||||
sticks: dense smooth PW G-vecs: dense smooth PW
|
||||
Min 112 40 15 1604 351 82
|
||||
Max 113 41 16 1607 354 83
|
||||
Sum 451 163 61 6423 1411 331
|
||||
|
||||
|
||||
|
||||
bravais-lattice index = 2
|
||||
lattice parameter (alat) = 6.6500 a.u.
|
||||
unit-cell volume = 73.5199 (a.u.)^3
|
||||
number of atoms/cell = 1
|
||||
number of atomic types = 1
|
||||
number of electrons = 10.00
|
||||
number of Kohn-Sham states= 9
|
||||
kinetic-energy cutoff = 27.0000 Ry
|
||||
charge density cutoff = 300.0000 Ry
|
||||
convergence threshold = 1.0E-08
|
||||
mixing beta = 0.7000
|
||||
number of iterations used = 8 plain mixing
|
||||
Exchange-correlation= SLA PW PBE PBE
|
||||
( 1 4 3 4 0 0 0)
|
||||
|
||||
celldm(1)= 6.650000 celldm(2)= 0.000000 celldm(3)= 0.000000
|
||||
celldm(4)= 0.000000 celldm(5)= 0.000000 celldm(6)= 0.000000
|
||||
|
||||
crystal axes: (cart. coord. in units of alat)
|
||||
a(1) = ( -0.500000 0.000000 0.500000 )
|
||||
a(2) = ( 0.000000 0.500000 0.500000 )
|
||||
a(3) = ( -0.500000 0.500000 0.000000 )
|
||||
|
||||
reciprocal axes: (cart. coord. in units 2 pi/alat)
|
||||
b(1) = ( -1.000000 -1.000000 1.000000 )
|
||||
b(2) = ( 1.000000 1.000000 1.000000 )
|
||||
b(3) = ( -1.000000 1.000000 -1.000000 )
|
||||
|
||||
|
||||
PseudoPot. # 1 for Ni read from file:
|
||||
/home/giannozz/q-e-mio/pseudo/Ni.pbe-nd-rrkjus.UPF
|
||||
MD5 check sum: d71bc9c4c8adef96ad6fe9664ede368e
|
||||
Pseudo is Ultrasoft + core correction, Zval = 10.0
|
||||
Generated by new atomic code, or converted to UPF format
|
||||
Using radial grid of 1203 points, 6 beta functions with:
|
||||
l(1) = 0
|
||||
l(2) = 0
|
||||
l(3) = 1
|
||||
l(4) = 1
|
||||
l(5) = 2
|
||||
l(6) = 2
|
||||
Q(r) pseudized with 0 coefficients
|
||||
|
||||
|
||||
atomic species valence mass pseudopotential
|
||||
Ni 10.00 58.69340 Ni( 1.00)
|
||||
|
||||
Starting magnetic structure
|
||||
atomic species magnetization
|
||||
Ni 0.500
|
||||
|
||||
48 Sym. Ops., with inversion, found
|
||||
|
||||
|
||||
|
||||
Cartesian axes
|
||||
|
||||
site n. atom positions (alat units)
|
||||
1 Ni tau( 1) = ( 0.0000000 0.0000000 0.0000000 )
|
||||
|
||||
number of k points= 10 Marzari-Vanderbilt smearing, width (Ry)= 0.0200
|
||||
cart. coord. in units 2pi/alat
|
||||
k( 1) = ( -0.1250000 0.1250000 0.1250000), wk = 0.0312500
|
||||
k( 2) = ( -0.3750000 0.3750000 -0.1250000), wk = 0.0937500
|
||||
k( 3) = ( 0.3750000 -0.3750000 0.6250000), wk = 0.0937500
|
||||
k( 4) = ( 0.1250000 -0.1250000 0.3750000), wk = 0.0937500
|
||||
k( 5) = ( -0.1250000 0.6250000 0.1250000), wk = 0.0937500
|
||||
k( 6) = ( 0.6250000 -0.1250000 0.8750000), wk = 0.1875000
|
||||
k( 7) = ( 0.3750000 0.1250000 0.6250000), wk = 0.1875000
|
||||
k( 8) = ( -0.1250000 -0.8750000 0.1250000), wk = 0.0937500
|
||||
k( 9) = ( -0.3750000 0.3750000 0.3750000), wk = 0.0312500
|
||||
k( 10) = ( 0.3750000 -0.3750000 1.1250000), wk = 0.0937500
|
||||
|
||||
Dense grid: 6423 G-vectors FFT dimensions: ( 25, 25, 25)
|
||||
|
||||
Smooth grid: 1411 G-vectors FFT dimensions: ( 15, 15, 15)
|
||||
|
||||
Estimated max dynamical RAM per process > 7.84 MB
|
||||
|
||||
Estimated total dynamical RAM > 31.37 MB
|
||||
Generating pointlists ...
|
||||
new r_m : 0.2917 (alat units) 1.9397 (a.u.) for type 1
|
||||
|
||||
Check: negative core charge= -0.000021
|
||||
|
||||
Initial potential from superposition of free atoms
|
||||
|
||||
starting charge 9.99954, renormalised to 10.00000
|
||||
Starting wfcs are 6 randomized atomic wfcs + 3 random wfcs
|
||||
|
||||
total cpu time spent up to now is 0.4 secs
|
||||
|
||||
Self-consistent Calculation
|
||||
|
||||
iteration # 1 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 1.00E-02, avg # of iterations = 4.2
|
||||
|
||||
total cpu time spent up to now is 0.4 secs
|
||||
|
||||
total energy = -85.61582607 Ry
|
||||
Harris-Foulkes estimate = -85.78369204 Ry
|
||||
estimated scf accuracy < 0.60123762 Ry
|
||||
|
||||
total magnetization = 1.63 Bohr mag/cell
|
||||
absolute magnetization = 1.65 Bohr mag/cell
|
||||
|
||||
iteration # 2 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 6.01E-03, avg # of iterations = 2.0
|
||||
|
||||
total cpu time spent up to now is 0.5 secs
|
||||
|
||||
total energy = -85.74795377 Ry
|
||||
Harris-Foulkes estimate = -86.04555041 Ry
|
||||
estimated scf accuracy < 0.81456890 Ry
|
||||
|
||||
total magnetization = 0.70 Bohr mag/cell
|
||||
absolute magnetization = 0.75 Bohr mag/cell
|
||||
|
||||
iteration # 3 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 6.01E-03, avg # of iterations = 1.1
|
||||
|
||||
total cpu time spent up to now is 0.5 secs
|
||||
|
||||
total energy = -85.88902499 Ry
|
||||
Harris-Foulkes estimate = -85.86964512 Ry
|
||||
estimated scf accuracy < 0.02587232 Ry
|
||||
|
||||
total magnetization = 0.85 Bohr mag/cell
|
||||
absolute magnetization = 1.00 Bohr mag/cell
|
||||
|
||||
iteration # 4 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 2.59E-04, avg # of iterations = 1.6
|
||||
|
||||
total cpu time spent up to now is 0.6 secs
|
||||
|
||||
total energy = -85.89672647 Ry
|
||||
Harris-Foulkes estimate = -85.89651894 Ry
|
||||
estimated scf accuracy < 0.00091973 Ry
|
||||
|
||||
total magnetization = 0.72 Bohr mag/cell
|
||||
absolute magnetization = 0.84 Bohr mag/cell
|
||||
|
||||
iteration # 5 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 9.20E-06, avg # of iterations = 2.0
|
||||
|
||||
total cpu time spent up to now is 0.6 secs
|
||||
|
||||
total energy = -85.89693407 Ry
|
||||
Harris-Foulkes estimate = -85.89693937 Ry
|
||||
estimated scf accuracy < 0.00010875 Ry
|
||||
|
||||
total magnetization = 0.70 Bohr mag/cell
|
||||
absolute magnetization = 0.82 Bohr mag/cell
|
||||
|
||||
iteration # 6 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 1.09E-06, avg # of iterations = 1.6
|
||||
|
||||
total cpu time spent up to now is 0.7 secs
|
||||
|
||||
total energy = -85.89698810 Ry
|
||||
Harris-Foulkes estimate = -85.89696699 Ry
|
||||
estimated scf accuracy < 0.00004828 Ry
|
||||
|
||||
total magnetization = 0.71 Bohr mag/cell
|
||||
absolute magnetization = 0.81 Bohr mag/cell
|
||||
|
||||
iteration # 7 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 4.83E-07, avg # of iterations = 1.0
|
||||
|
||||
total cpu time spent up to now is 0.7 secs
|
||||
|
||||
total energy = -85.89698843 Ry
|
||||
Harris-Foulkes estimate = -85.89698777 Ry
|
||||
estimated scf accuracy < 0.00000129 Ry
|
||||
|
||||
total magnetization = 0.71 Bohr mag/cell
|
||||
absolute magnetization = 0.81 Bohr mag/cell
|
||||
|
||||
iteration # 8 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 1.29E-08, avg # of iterations = 2.1
|
||||
|
||||
total cpu time spent up to now is 0.8 secs
|
||||
|
||||
total energy = -85.89698924 Ry
|
||||
Harris-Foulkes estimate = -85.89698895 Ry
|
||||
estimated scf accuracy < 0.00000064 Ry
|
||||
|
||||
total magnetization = 0.71 Bohr mag/cell
|
||||
absolute magnetization = 0.80 Bohr mag/cell
|
||||
|
||||
iteration # 9 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 6.40E-09, avg # of iterations = 1.2
|
||||
|
||||
total cpu time spent up to now is 0.8 secs
|
||||
|
||||
total energy = -85.89698930 Ry
|
||||
Harris-Foulkes estimate = -85.89698920 Ry
|
||||
estimated scf accuracy < 0.00000020 Ry
|
||||
|
||||
total magnetization = 0.71 Bohr mag/cell
|
||||
absolute magnetization = 0.80 Bohr mag/cell
|
||||
|
||||
iteration # 10 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 2.02E-09, avg # of iterations = 1.0
|
||||
|
||||
total cpu time spent up to now is 0.9 secs
|
||||
|
||||
total energy = -85.89698931 Ry
|
||||
Harris-Foulkes estimate = -85.89698930 Ry
|
||||
estimated scf accuracy < 0.00000002 Ry
|
||||
|
||||
total magnetization = 0.71 Bohr mag/cell
|
||||
absolute magnetization = 0.80 Bohr mag/cell
|
||||
|
||||
iteration # 11 ecut= 27.00 Ry beta= 0.70
|
||||
Davidson diagonalization with overlap
|
||||
ethr = 2.13E-10, avg # of iterations = 1.0
|
||||
|
||||
Magnetic moment per site:
|
||||
atom: 1 charge: 8.7379 magn: 0.7488 constr: 0.0000
|
||||
|
||||
total cpu time spent up to now is 0.9 secs
|
||||
|
||||
End of self-consistent calculation
|
||||
|
||||
------ SPIN UP ------------
|
||||
|
||||
|
||||
k =-0.1250 0.1250 0.1250 ( 172 PWs) bands (ev):
|
||||
|
||||
5.8697 11.5737 11.8317 11.8317 12.8611 12.8611 35.2150 39.1169
|
||||
41.0563
|
||||
|
||||
k =-0.3750 0.3750-0.1250 ( 171 PWs) bands (ev):
|
||||
|
||||
8.5750 11.2499 11.8341 12.1285 12.7519 13.6726 27.1041 32.6457
|
||||
39.6758
|
||||
|
||||
k = 0.3750-0.3750 0.6250 ( 172 PWs) bands (ev):
|
||||
|
||||
9.6622 11.5165 11.9825 12.1969 13.5532 15.4846 20.4981 33.7467
|
||||
36.0278
|
||||
|
||||
k = 0.1250-0.1250 0.3750 ( 169 PWs) bands (ev):
|
||||
|
||||
7.3630 11.1755 12.0271 12.1374 12.6933 13.1367 31.2697 36.2534
|
||||
36.8261
|
||||
|
||||
k =-0.1250 0.6250 0.1250 ( 178 PWs) bands (ev):
|
||||
|
||||
9.3858 10.5801 12.0472 12.7102 13.4795 13.7865 28.1567 31.5072
|
||||
32.3294
|
||||
|
||||
k = 0.6250-0.1250 0.8750 ( 179 PWs) bands (ev):
|
||||
|
||||
10.3858 10.6410 11.6236 12.9145 13.5150 19.0389 22.3264 26.0100
|
||||
28.3109
|
||||
|
||||
k = 0.3750 0.1250 0.6250 ( 174 PWs) bands (ev):
|
||||
|
||||
10.0138 11.0551 11.4267 12.4905 13.2322 15.3089 24.0931 29.7561
|
||||
32.8980
|
||||
|
||||
k =-0.1250-0.8750 0.1250 ( 176 PWs) bands (ev):
|
||||
|
||||
9.7772 10.1642 12.8691 13.3034 13.6221 16.7879 24.9788 26.3753
|
||||
30.0885
|
||||
|
||||
k =-0.3750 0.3750 0.3750 ( 174 PWs) bands (ev):
|
||||
|
||||
9.0448 11.8253 11.8253 12.3362 13.3394 13.3394 23.0015 37.0668
|
||||
39.2789
|
||||
|
||||
k = 0.3750-0.3750 1.1250 ( 176 PWs) bands (ev):
|
||||
|
||||
10.3652 11.0167 11.5576 12.5016 13.2684 17.7549 21.2363 27.2374
|
||||
34.3326
|
||||
|
||||
------ SPIN DOWN ----------
|
||||
|
||||
|
||||
k =-0.1250 0.1250 0.1250 ( 172 PWs) bands (ev):
|
||||
|
||||
5.8235 12.4452 12.7306 12.7306 13.5993 13.5993 35.2386 38.9839
|
||||
41.0911
|
||||
|
||||
k =-0.3750 0.3750-0.1250 ( 171 PWs) bands (ev):
|
||||
|
||||
8.6208 11.9920 12.5953 12.9299 13.5958 14.4987 27.2785 32.7142
|
||||
39.6077
|
||||
|
||||
k = 0.3750-0.3750 0.6250 ( 172 PWs) bands (ev):
|
||||
|
||||
10.1825 12.1397 12.7501 12.7926 14.4701 15.8906 20.9029 33.7520
|
||||
36.0975
|
||||
|
||||
k = 0.1250-0.1250 0.3750 ( 169 PWs) bands (ev):
|
||||
|
||||
7.3328 11.9983 12.8358 13.0200 13.4874 13.9185 31.3755 36.3333
|
||||
36.7643
|
||||
|
||||
k =-0.1250 0.6250 0.1250 ( 178 PWs) bands (ev):
|
||||
|
||||
9.5396 11.3428 12.7065 13.5760 14.3301 14.5163 28.2785 31.5780
|
||||
32.3842
|
||||
|
||||
k = 0.6250-0.1250 0.8750 ( 179 PWs) bands (ev):
|
||||
|
||||
10.8818 11.3220 12.3443 13.6454 14.5133 19.3212 22.5349 26.1705
|
||||
28.4085
|
||||
|
||||
k = 0.3750 0.1250 0.6250 ( 174 PWs) bands (ev):
|
||||
|
||||
10.3493 11.6766 12.1579 13.2575 14.1339 15.9186 24.3093 29.8491
|
||||
32.9693
|
||||
|
||||
k =-0.1250-0.8750 0.1250 ( 176 PWs) bands (ev):
|
||||
|
||||
10.2090 10.8957 13.6527 14.1097 14.5846 17.0385 25.1835 26.4722
|
||||
30.1022
|
||||
|
||||
k =-0.3750 0.3750 0.3750 ( 174 PWs) bands (ev):
|
||||
|
||||
9.3306 12.6014 12.6014 12.6765 14.2264 14.2264 23.2891 36.8996
|
||||
39.3685
|
||||
|
||||
k = 0.3750-0.3750 1.1250 ( 176 PWs) bands (ev):
|
||||
|
||||
10.9698 11.5109 12.2799 13.2468 14.2186 18.1064 21.5401 27.3703
|
||||
34.3960
|
||||
|
||||
the Fermi energy is 14.2874 ev
|
||||
|
||||
! total energy = -85.89698931 Ry
|
||||
Harris-Foulkes estimate = -85.89698931 Ry
|
||||
estimated scf accuracy < 1.3E-09 Ry
|
||||
|
||||
The total energy is the sum of the following terms:
|
||||
|
||||
one-electron contribution = -2.06436173 Ry
|
||||
hartree contribution = 15.23370153 Ry
|
||||
xc contribution = -30.12053187 Ry
|
||||
ewald contribution = -68.94529435 Ry
|
||||
smearing contrib. (-TS) = -0.00050289 Ry
|
||||
|
||||
total magnetization = 0.71 Bohr mag/cell
|
||||
absolute magnetization = 0.80 Bohr mag/cell
|
||||
|
||||
convergence has been achieved in 11 iterations
|
||||
|
||||
Forces acting on atoms (cartesian axes, Ry/au):
|
||||
|
||||
atom 1 type 1 force = 0.00000000 0.00000000 0.00000000
|
||||
|
||||
Total force = 0.000000 Total SCF correction = 0.000000
|
||||
|
||||
Writing output data file /home/giannozz/q-e-mio/tempdir/nickel.save/
|
||||
|
||||
init_run : 0.09s CPU 0.09s WALL ( 1 calls)
|
||||
electrons : 0.48s CPU 0.53s WALL ( 1 calls)
|
||||
forces : 0.01s CPU 0.01s WALL ( 1 calls)
|
||||
|
||||
Called by init_run:
|
||||
wfcinit : 0.01s CPU 0.01s WALL ( 1 calls)
|
||||
potinit : 0.01s CPU 0.01s WALL ( 1 calls)
|
||||
hinit0 : 0.06s CPU 0.06s WALL ( 1 calls)
|
||||
|
||||
Called by electrons:
|
||||
c_bands : 0.27s CPU 0.30s WALL ( 11 calls)
|
||||
sum_band : 0.13s CPU 0.13s WALL ( 11 calls)
|
||||
v_of_rho : 0.05s CPU 0.05s WALL ( 12 calls)
|
||||
newd : 0.03s CPU 0.04s WALL ( 12 calls)
|
||||
mix_rho : 0.01s CPU 0.01s WALL ( 11 calls)
|
||||
|
||||
Called by c_bands:
|
||||
init_us_2 : 0.01s CPU 0.01s WALL ( 480 calls)
|
||||
cegterg : 0.25s CPU 0.28s WALL ( 220 calls)
|
||||
|
||||
Called by sum_band:
|
||||
sum_band:bec : 0.00s CPU 0.00s WALL ( 220 calls)
|
||||
addusdens : 0.08s CPU 0.08s WALL ( 11 calls)
|
||||
|
||||
Called by *egterg:
|
||||
h_psi : 0.18s CPU 0.20s WALL ( 617 calls)
|
||||
s_psi : 0.00s CPU 0.00s WALL ( 617 calls)
|
||||
g_psi : 0.00s CPU 0.00s WALL ( 377 calls)
|
||||
cdiaghg : 0.05s CPU 0.05s WALL ( 597 calls)
|
||||
|
||||
Called by h_psi:
|
||||
h_psi:calbec : 0.01s CPU 0.01s WALL ( 617 calls)
|
||||
vloc_psi : 0.16s CPU 0.18s WALL ( 617 calls)
|
||||
add_vuspsi : 0.00s CPU 0.01s WALL ( 617 calls)
|
||||
|
||||
General routines
|
||||
calbec : 0.01s CPU 0.01s WALL ( 917 calls)
|
||||
fft : 0.03s CPU 0.03s WALL ( 313 calls)
|
||||
ffts : 0.00s CPU 0.00s WALL ( 46 calls)
|
||||
fftw : 0.18s CPU 0.20s WALL ( 11200 calls)
|
||||
interpolate : 0.00s CPU 0.00s WALL ( 24 calls)
|
||||
|
||||
Parallel routines
|
||||
fft_scatt_xy : 0.03s CPU 0.03s WALL ( 11559 calls)
|
||||
fft_scatt_yz : 0.11s CPU 0.12s WALL ( 11559 calls)
|
||||
|
||||
PWSCF : 0.84s CPU 0.93s WALL
|
||||
|
||||
|
||||
This run was terminated on: 9:46:10 19Mar2020
|
||||
|
||||
=------------------------------------------------------------------------------=
|
||||
JOB DONE.
|
||||
=------------------------------------------------------------------------------=
|
|
@ -0,0 +1,139 @@
|
|||
#!/bin/sh
|
||||
|
||||
# run from directory where this script is
|
||||
cd `echo $0 | sed 's/\(.*\)\/.*/\1/'` # extract pathname
|
||||
EXAMPLE_DIR=`pwd`
|
||||
|
||||
# check whether ECHO has the -e option
|
||||
if test "`echo -e`" = "-e" ; then ECHO=echo ; else ECHO="echo -e" ; fi
|
||||
|
||||
$ECHO
|
||||
$ECHO "$EXAMPLE_DIR : starting"
|
||||
$ECHO
|
||||
$ECHO "This example shows how to use pw.x and ph.x to calculate phonon"
|
||||
$ECHO "dispersions for spin-polarized fcc-Ni."
|
||||
|
||||
# set the needed environment variables
|
||||
. ../../../environment_variables
|
||||
# required executables and pseudopotentials
|
||||
BIN_LIST="pw.x ph.x"
|
||||
PSEUDO_LIST="Ni.pbe-nd-rrkjus.UPF"
|
||||
|
||||
$ECHO
|
||||
$ECHO " executables directory: $BIN_DIR"
|
||||
$ECHO " pseudo directory: $PSEUDO_DIR"
|
||||
$ECHO " temporary directory: $TMP_DIR"
|
||||
$ECHO
|
||||
$ECHO " checking that needed directories and files exist...\c"
|
||||
|
||||
# check for directories
|
||||
for DIR in "$BIN_DIR" "$PSEUDO_DIR" ; do
|
||||
if test ! -d $DIR ; then
|
||||
$ECHO
|
||||
$ECHO "ERROR: $DIR not existent or not a directory"
|
||||
$ECHO "Aborting"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
for DIR in "$TMP_DIR" "$EXAMPLE_DIR/results" ; do
|
||||
if test ! -d $DIR ; then
|
||||
mkdir $DIR
|
||||
fi
|
||||
done
|
||||
cd $EXAMPLE_DIR/results
|
||||
|
||||
# check for executables
|
||||
for FILE in $BIN_LIST ; do
|
||||
if test ! -x $BIN_DIR/$FILE ; then
|
||||
$ECHO
|
||||
$ECHO "ERROR: $BIN_DIR/$FILE not existent or not executable"
|
||||
$ECHO "Aborting"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# check for pseudopotentials
|
||||
for FILE in $PSEUDO_LIST ; do
|
||||
if test ! -r $PSEUDO_DIR/$FILE ; then
|
||||
$ECHO
|
||||
$ECHO "Downloading $FILE to $PSEUDO_DIR...\c"
|
||||
$WGET $PSEUDO_DIR/$FILE $NETWORK_PSEUDO/$FILE 2> /dev/null
|
||||
fi
|
||||
if test $? != 0; then
|
||||
$ECHO
|
||||
$ECHO "ERROR: $PSEUDO_DIR/$FILE not existent or not readable"
|
||||
$ECHO "Aborting"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
$ECHO " done"
|
||||
|
||||
# how to run executables
|
||||
PW_COMMAND="$PARA_PREFIX $BIN_DIR/pw.x $PARA_POSTFIX"
|
||||
PH_COMMAND="$PARA_PREFIX $BIN_DIR/ph.x $PARA_POSTFIX"
|
||||
$ECHO
|
||||
$ECHO " running pw.x as: $PW_COMMAND"
|
||||
$ECHO " running ph.x as: $PH_COMMAND"
|
||||
$ECHO
|
||||
|
||||
# clean TMP_DIR
|
||||
$ECHO " cleaning $TMP_DIR...\c"
|
||||
rm -rf $TMP_DIR/nickel*
|
||||
rm -rf $TMP_DIR/_ph0/nickel*
|
||||
$ECHO " done"
|
||||
|
||||
# self-consistent calculation for Ni with US-PP
|
||||
cat > ni.scf.in << EOF
|
||||
&control
|
||||
calculation='scf'
|
||||
restart_mode='from_scratch',
|
||||
tprnfor = .true.
|
||||
prefix='nickel',
|
||||
pseudo_dir = '$PSEUDO_DIR/',
|
||||
outdir='$TMP_DIR/'
|
||||
/
|
||||
&system
|
||||
ibrav=2, celldm(1) =6.65, nat= 1, ntyp= 1,
|
||||
nspin=2,
|
||||
starting_magnetization(1)=0.5,
|
||||
degauss=0.02,
|
||||
smearing='mv',
|
||||
occupations='smearing',
|
||||
ecutwfc =27.0
|
||||
ecutrho =300.0
|
||||
/
|
||||
&electrons
|
||||
conv_thr = 1.0d-8
|
||||
mixing_beta = 0.7
|
||||
/
|
||||
ATOMIC_SPECIES
|
||||
Ni 58.6934 Ni.pbe-nd-rrkjus.UPF
|
||||
ATOMIC_POSITIONS (alat)
|
||||
Ni 0.00 0.00 0.00
|
||||
K_POINTS AUTOMATIC
|
||||
4 4 4 1 1 1
|
||||
EOF
|
||||
$ECHO " running the scf calculation for Ni...\c"
|
||||
$PW_COMMAND < ni.scf.in > ni.scf.out
|
||||
check_failure $?
|
||||
$ECHO " done"
|
||||
|
||||
# phonon dispersion calculation
|
||||
cat > ni.ph.in << EOF
|
||||
phonons of Ni
|
||||
&inputph
|
||||
tr2_ph=1.0d-14,
|
||||
prefix='nickel',
|
||||
amass(1)=58.6934,
|
||||
fildyn='niX.dyn',
|
||||
outdir='$TMP_DIR/',
|
||||
ldisp=.true., nq1=4,nq2=4,nq3=4
|
||||
/
|
||||
EOF
|
||||
$ECHO " running the phonon dispersion calculation for Ni...\c"
|
||||
$PH_COMMAND < ni.ph.in > ni.ph.out
|
||||
check_failure $?
|
||||
$ECHO " done"
|
||||
|
||||
$ECHO
|
||||
$ECHO "$EXAMPLE_DIR: done"
|
|
@ -79,39 +79,96 @@ SUBROUTINE print_clock_pw()
|
|||
WRITE( stdout, '(/5x,"Called by c_bands:")' )
|
||||
CALL print_clock( 'init_us_2' )
|
||||
IF ( isolve == 0 ) THEN
|
||||
IF ( gamma_only ) THEN
|
||||
CALL print_clock( 'regterg' )
|
||||
ELSE
|
||||
CALL print_clock( 'cegterg' )
|
||||
ENDIF
|
||||
CALL print_clock( 'regterg' ) ; CALL print_clock( 'cegterg' )
|
||||
ELSE IF (isolve == 1) THEN
|
||||
IF ( gamma_only ) THEN
|
||||
CALL print_clock( 'rcgdiagg' )
|
||||
ELSE
|
||||
CALL print_clock( 'ccgdiagg' )
|
||||
ENDIF
|
||||
CALL print_clock( 'rcgdiagg' ) ; CALL print_clock( 'ccgdiagg' )
|
||||
CALL print_clock( 'wfcrot' )
|
||||
ELSE IF (isolve == 2) THEN
|
||||
IF ( gamma_only ) THEN
|
||||
CALL print_clock( 'ppcg_gamma' )
|
||||
ELSE
|
||||
CALL print_clock( 'ppcg_k' )
|
||||
ENDIF
|
||||
CALL print_clock( 'ppcg_gamma' ) ; CALL print_clock( 'ppcg_k' )
|
||||
CALL print_clock( 'wfcrot' )
|
||||
ELSE IF (isolve == 3) THEN
|
||||
CALL print_clock( 'paro_gamma' ) ; CALL print_clock( 'paro_k' )
|
||||
ENDIF
|
||||
!
|
||||
!IF ( iverbosity > 0) THEN
|
||||
WRITE( stdout, '(/5x,"Called by sum_band:")' )
|
||||
CALL print_clock( 'sum_band:weights' )
|
||||
CALL print_clock( 'sum_band:loop' )
|
||||
CALL print_clock( 'sum_band:buffer' )
|
||||
CALL print_clock( 'sum_band:init_us_2' )
|
||||
CALL print_clock( 'sum_band:calbec' )
|
||||
CALL print_clock( 'sum_band:becsum' )
|
||||
CALL print_clock( 'addusdens' )
|
||||
!ENDIF
|
||||
!
|
||||
IF ( isolve == 0 ) THEN
|
||||
WRITE( stdout, '(/5x,"Called by *egterg:")' )
|
||||
IF ( gamma_only ) THEN
|
||||
CALL print_clock( 'rdiaghg' )
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'regterg:overlap' )
|
||||
CALL print_clock( 'regterg:update' )
|
||||
CALL print_clock( 'regterg:last' )
|
||||
CALL print_clock( 'rdiaghg:choldc' )
|
||||
CALL print_clock( 'rdiaghg:inversion' )
|
||||
CALL print_clock( 'rdiaghg:paragemm' )
|
||||
ENDIF
|
||||
ELSE
|
||||
CALL print_clock( 'cdiaghg' )
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'cegterg:overlap' )
|
||||
CALL print_clock( 'cegterg:update' )
|
||||
CALL print_clock( 'cegterg:last' )
|
||||
CALL print_clock( 'cdiaghg:choldc' )
|
||||
CALL print_clock( 'cdiaghg:inversion' )
|
||||
CALL print_clock( 'cdiaghg:paragemm' )
|
||||
END IF
|
||||
END IF
|
||||
ELSE IF ( isolve == 1 ) THEN
|
||||
WRITE( stdout, '(/5x,"Called by *cgdiagg:")' )
|
||||
ELSE IF ( isolve == 2 ) THEN
|
||||
WRITE( stdout, '(/5x,"Called by ppcg_*:")' )
|
||||
! IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'ppcg:zgemm' ) ; CALL print_clock( 'ppcg:dgemm' )
|
||||
CALL print_clock( 'ppcg:hpsi' )
|
||||
CALL print_clock( 'ppcg:cholQR' )
|
||||
CALL print_clock( 'ppcg:RR' )
|
||||
CALL print_clock( 'ppcg:ZTRSM' ) ; CALL print_clock( 'ppcg:DTRSM' )
|
||||
CALL print_clock( 'ppcg:lock' )
|
||||
! END IF
|
||||
ELSE IF ( isolve == 3 ) THEN
|
||||
WRITE( stdout, '(/5x,"Called by paro_*:")' )
|
||||
! IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'paro:init' )
|
||||
CALL print_clock( 'paro:pack' )
|
||||
CALL print_clock( 'paro:zero' )
|
||||
CALL print_clock( 'paro:mp_bar' )
|
||||
CALL print_clock( 'paro:mp_sum' )
|
||||
CALL print_clock( 'pcg' )
|
||||
CALL print_clock( 'pcg:hs_1psi' )
|
||||
CALL print_clock( 'pcg:ortho' )
|
||||
CALL print_clock( 'pcg:move' )
|
||||
|
||||
CALL print_clock( 'rotHSw' )
|
||||
CALL print_clock( 'rotHSw:move' )
|
||||
CALL print_clock( 'rotHSw:hc' )
|
||||
CALL print_clock( 'rotHSw:diag' )
|
||||
CALL print_clock( 'rotHSw:evc' )
|
||||
CALL print_clock( 'rotHSw:hc:b0' ) ;
|
||||
CALL print_clock( 'rotHSw:hc:s1' ) ; call print_clock('rotHSw:hc:comp')
|
||||
CALL print_clock( 'rotHSw:hc:b1' ) ;
|
||||
CALL print_clock( 'rotHSw:hc:s2' ) ;
|
||||
CALL print_clock( 'rotHSw:hc:s3' ) ; call print_clock('rotHSw:hc:rs')
|
||||
CALL print_clock( 'rotHSw:hc:b2' ) ; call print_clock('rotHSw:hc:sy')
|
||||
CALL print_clock( 'rotHSw:hc:s4' ) ; CALL print_clock('rotHSw:hc:b3' )
|
||||
CALL print_clock( 'rotHSw:ev:b0' ) ;
|
||||
CALL print_clock( 'rotHSw:ev:b3' ) ; call print_clock('rotHSw:ev:bc')
|
||||
CALL print_clock( 'rotHSw:ev:s5' ) ;
|
||||
CALL print_clock( 'rotHSw:ev:b4' ) ; call print_clock('rotHSw:ev:comp')
|
||||
CALL print_clock( 'rotHSw:ev:s6' ) ;
|
||||
CALL print_clock( 'rotHSw:ev:b5' ) ; call print_clock('rotHSw:ev:sum')
|
||||
CALL print_clock( 'rotHSw:ev:s7' ) ; CALL print_clock('rotHSw:ev:b6' )
|
||||
! END IF
|
||||
END IF
|
||||
!
|
||||
CALL print_clock( 'h_psi' )
|
||||
|
@ -130,37 +187,6 @@ SUBROUTINE print_clock_pw()
|
|||
CALL print_clock ( 'fwfft_orbital' )
|
||||
CALL print_clock ( 'v_loc_psir' )
|
||||
ENDIF
|
||||
IF ( gamma_only ) THEN
|
||||
CALL print_clock( 'rdiaghg' )
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'regterg:overlap' )
|
||||
CALL print_clock( 'regterg:update' )
|
||||
CALL print_clock( 'regterg:last' )
|
||||
CALL print_clock( 'rdiaghg:choldc' )
|
||||
CALL print_clock( 'rdiaghg:inversion' )
|
||||
CALL print_clock( 'rdiaghg:paragemm' )
|
||||
ENDIF
|
||||
ELSE
|
||||
CALL print_clock( 'cdiaghg' )
|
||||
IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'cegterg:overlap' )
|
||||
CALL print_clock( 'cegterg:update' )
|
||||
CALL print_clock( 'cegterg:last' )
|
||||
CALL print_clock( 'cdiaghg:choldc' )
|
||||
CALL print_clock( 'cdiaghg:inversion' )
|
||||
CALL print_clock( 'cdiaghg:paragemm' )
|
||||
END IF
|
||||
END IF
|
||||
IF ( isolve == 2 ) THEN
|
||||
! IF ( iverbosity > 0 ) THEN
|
||||
CALL print_clock( 'ppcg:zgemm' ) ; CALL print_clock( 'ppcg:dgemm' )
|
||||
CALL print_clock( 'ppcg:hpsi' )
|
||||
CALL print_clock( 'ppcg:cholQR' )
|
||||
CALL print_clock( 'ppcg:RR' )
|
||||
CALL print_clock( 'ppcg:ZTRSM' ) ; CALL print_clock( 'ppcg:DTRSM' )
|
||||
CALL print_clock( 'ppcg:lock' )
|
||||
! END IF
|
||||
END IF
|
||||
!
|
||||
WRITE( stdout, '(/5x,"Called by h_psi:")' )
|
||||
CALL print_clock( 'h_psi:calbec' )
|
||||
|
|
|
@ -78,7 +78,9 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
! ... calculates weights of Kohn-Sham orbitals used in calculation of rho
|
||||
!
|
||||
CALL start_clock( 'sum_band:weights' )
|
||||
CALL weights ( )
|
||||
CALL stop_clock( 'sum_band:weights' )
|
||||
!
|
||||
IF (one_atom_occupations) CALL new_evc()
|
||||
!
|
||||
|
@ -122,6 +124,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
eband = 0.D0
|
||||
!
|
||||
CALL start_clock( 'sum_band:loop' )
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
CALL sum_band_gamma()
|
||||
|
@ -131,6 +134,7 @@ SUBROUTINE sum_band()
|
|||
CALL sum_band_k()
|
||||
!
|
||||
END IF
|
||||
CALL stop_clock( 'sum_band:loop' )
|
||||
CALL mp_sum( eband, inter_pool_comm )
|
||||
CALL mp_sum( eband, inter_bgrp_comm )
|
||||
!
|
||||
|
@ -185,6 +189,7 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
! ... symmetrize rho(G)
|
||||
!
|
||||
CALL start_clock( 'sum_band:sym_rho' )
|
||||
CALL sym_rho ( nspin_mag, rho%of_g )
|
||||
!
|
||||
! ... synchronize rho%of_r to the calculated rho%of_g (use psic as work array)
|
||||
|
@ -222,6 +227,7 @@ SUBROUTINE sum_band()
|
|||
END DO
|
||||
!
|
||||
END IF
|
||||
CALL stop_clock( 'sum_band:sym_rho' )
|
||||
!
|
||||
! ... if LSDA rho%of_r and rho%of_g are converted from (up,dw) to
|
||||
! ... (up+dw,up-dw) format.
|
||||
|
@ -286,13 +292,21 @@ SUBROUTINE sum_band()
|
|||
!
|
||||
npw = ngk(ik)
|
||||
!
|
||||
CALL start_clock( 'sum_band:buffer' )
|
||||
IF ( nks > 1 ) &
|
||||
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
|
||||
|
||||
IF ( nks > 1 ) CALL using_evc(1) ! get_buffer(evc, ...) evc is updated (intent out)
|
||||
|
||||
CALL stop_clock( 'sum_band:buffer' )
|
||||
!
|
||||
CALL start_clock( 'sum_band:init_us_2' )
|
||||
!
|
||||
IF ( nkb > 0 ) CALL using_vkb(1)
|
||||
!
|
||||
IF ( nkb > 0 ) &
|
||||
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
|
||||
CALL stop_clock( 'sum_band:init_us_2' )
|
||||
!
|
||||
! ... here we compute the band energy: the sum of the eigenvalues
|
||||
!
|
||||
|
@ -548,13 +562,20 @@ SUBROUTINE sum_band()
|
|||
IF ( lsda ) current_spin = isk(ik)
|
||||
npw = ngk (ik)
|
||||
!
|
||||
CALL start_clock( 'sum_band:buffer' )
|
||||
IF ( nks > 1 ) &
|
||||
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
|
||||
IF ( nks > 1 ) CALL using_evc(1)
|
||||
|
||||
CALL stop_clock( 'sum_band:buffer' )
|
||||
!
|
||||
CALL start_clock( 'sum_band:init_us_2' )
|
||||
!
|
||||
IF ( nkb > 0 ) CALL using_vkb(1)
|
||||
|
||||
IF ( nkb > 0 ) &
|
||||
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
|
||||
CALL stop_clock( 'sum_band:init_us_2' )
|
||||
!
|
||||
! ... here we compute the band energy: the sum of the eigenvalues
|
||||
!
|
||||
|
@ -926,6 +947,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
|
|||
CALL using_indv_ijkb0(0)
|
||||
CALL using_becp_auto(2)
|
||||
!
|
||||
CALL start_clock( 'sum_band:calbec' )
|
||||
npw = ngk(ik)
|
||||
IF ( .NOT. real_space ) THEN
|
||||
! calbec computes becp = <vkb_i|psi_j>
|
||||
|
@ -947,6 +969,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
|
|||
call mp_sum(becp%k,inter_bgrp_comm)
|
||||
endif
|
||||
ENDIF
|
||||
CALL stop_clock( 'sum_band:calbec' )
|
||||
!
|
||||
! In the EXX case with ultrasoft or PAW, a copy of becp will be
|
||||
! saved in a global variable to be rotated later
|
||||
|
|
|
@ -84,7 +84,9 @@ SUBROUTINE sum_band_gpu()
|
|||
!
|
||||
! ... calculates weights of Kohn-Sham orbitals used in calculation of rho
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:weights' )
|
||||
CALL weights ( )
|
||||
CALL stop_clock_gpu( 'sum_band:weights' )
|
||||
!
|
||||
IF (one_atom_occupations) CALL new_evc()
|
||||
!
|
||||
|
@ -128,6 +130,7 @@ SUBROUTINE sum_band_gpu()
|
|||
!
|
||||
eband = 0.D0
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:loop' )
|
||||
IF ( gamma_only ) THEN
|
||||
!
|
||||
CALL sum_band_gamma_gpu()
|
||||
|
@ -137,6 +140,7 @@ SUBROUTINE sum_band_gpu()
|
|||
CALL sum_band_k_gpu()
|
||||
!
|
||||
END IF
|
||||
CALL stop_clock_gpu( 'sum_band:loop' )
|
||||
CALL mp_sum( eband, inter_pool_comm )
|
||||
CALL mp_sum( eband, inter_bgrp_comm )
|
||||
!
|
||||
|
@ -193,6 +197,7 @@ SUBROUTINE sum_band_gpu()
|
|||
!
|
||||
! ... symmetrize rho(G)
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:sym_rho' )
|
||||
CALL sym_rho ( nspin_mag, rho%of_g )
|
||||
!
|
||||
! ... synchronize rho%of_r to the calculated rho%of_g (use psic as work array)
|
||||
|
@ -230,6 +235,7 @@ SUBROUTINE sum_band_gpu()
|
|||
END DO
|
||||
!
|
||||
END IF
|
||||
CALL stop_clock_gpu( 'sum_band:sym_rho' )
|
||||
!
|
||||
! ... if LSDA rho%of_r and rho%of_g are converted from (up,dw) to
|
||||
! ... (up+dw,up-dw) format.
|
||||
|
@ -247,8 +253,7 @@ SUBROUTINE sum_band_gpu()
|
|||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE sum_band_gamma_gpu()
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! ... gamma version
|
||||
!! \(\texttt{sum_band}\) - part for gamma version.
|
||||
!
|
||||
USE becmod, ONLY : becp
|
||||
USE mp_bands, ONLY : me_bgrp
|
||||
|
@ -309,13 +314,19 @@ SUBROUTINE sum_band_gpu()
|
|||
!
|
||||
npw = ngk(ik)
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:buffer' )
|
||||
IF ( nks > 1 ) &
|
||||
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
|
||||
IF ( nks > 1 ) CALL using_evc(2) ! get_buffer(evc, ...) evc is updated (intent out)
|
||||
IF ( nks > 1 ) CALL using_evc_d(0) ! sync on the GPU
|
||||
!
|
||||
CALL stop_clock_gpu( 'sum_band:buffer' )
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:init_us_2' )
|
||||
|
||||
IF ( nkb > 0 ) CALL using_vkb_d(2)
|
||||
IF ( nkb > 0 ) CALL init_us_2_gpu( npw, igk_k_d(1,ik), xk(1,ik), vkb_d )
|
||||
CALL stop_clock_gpu( 'sum_band:init_us_2' )
|
||||
!
|
||||
! ... here we compute the band energy: the sum of the eigenvalues
|
||||
!
|
||||
|
@ -523,8 +534,7 @@ SUBROUTINE sum_band_gpu()
|
|||
!-----------------------------------------------------------------------
|
||||
SUBROUTINE sum_band_k_gpu()
|
||||
!-----------------------------------------------------------------------
|
||||
!
|
||||
! ... k-points version
|
||||
!! \(\texttt{sum_band}\) - part for k-points version
|
||||
!
|
||||
USE wavefunctions_gpum, ONLY : psic_nc_d
|
||||
USE mp_bands, ONLY : me_bgrp
|
||||
|
@ -611,14 +621,18 @@ SUBROUTINE sum_band_gpu()
|
|||
IF ( lsda ) current_spin = isk(ik)
|
||||
npw = ngk (ik)
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:buffer' )
|
||||
IF ( nks > 1 ) &
|
||||
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
|
||||
IF ( nks > 1 ) CALL using_evc(2)
|
||||
IF ( nks > 1 ) CALL using_evc_d(0) ! sync evc on GPU, OPTIMIZE (use async here)
|
||||
CALL stop_clock_gpu( 'sum_band:buffer' )
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:init_us_2' )
|
||||
IF ( nkb > 0 ) CALL using_vkb_d(2)
|
||||
IF ( nkb > 0 ) &
|
||||
CALL init_us_2_gpu( npw, igk_k_d(1,ik), xk(1,ik), vkb_d )
|
||||
CALL stop_clock_gpu( 'sum_band:init_us_2' )
|
||||
!
|
||||
! ... here we compute the band energy: the sum of the eigenvalues
|
||||
!
|
||||
|
@ -996,13 +1010,15 @@ END SUBROUTINE sum_band_gpu
|
|||
SUBROUTINE sum_bec_gpu ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! This routine computes the sum over bands
|
||||
! \sum_i <\psi_i|\beta_l>w_i<\beta_m|\psi_i>
|
||||
! for point "ik" and, for LSDA, spin "current_spin"
|
||||
! Calls calbec to compute "becp"=<beta_m|psi_i>
|
||||
! Output is accumulated (unsymmetrized) into "becsum", module "uspp"
|
||||
!! This routine computes the sum over bands:
|
||||
!
|
||||
! Routine used in sum_band (if okvan) and in compute_becsum, called by hinit1 (if okpaw)
|
||||
!! \[ \sum_i \langle\psi_i|\beta_l\rangle w_i \langle\beta_m|\psi_i\rangle \]
|
||||
!
|
||||
!! for point "ik" and, for LSDA, spin "current_spin".
|
||||
!! Calls calbec to compute \(\text{"becp"}=\langle \beta_m|\psi_i \rangle\).
|
||||
!! Output is accumulated (unsymmetrized) into "becsum", module "uspp".
|
||||
!
|
||||
!! Routine used in sum_band (if okvan) and in compute_becsum, called by hinit1 (if okpaw).
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
|
@ -1064,6 +1080,7 @@ SUBROUTINE sum_bec_gpu ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd
|
|||
CALL using_becsum_d(1)
|
||||
IF (tqr) CALL using_ebecsum_d(1)
|
||||
!
|
||||
CALL start_clock_gpu( 'sum_band:calbec' )
|
||||
npw = ngk(ik)
|
||||
IF ( .NOT. real_space ) THEN
|
||||
CALL using_evc_d(0); CALL using_vkb_d(0); CALL using_becp_d_auto(2)
|
||||
|
@ -1087,6 +1104,7 @@ SUBROUTINE sum_bec_gpu ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd
|
|||
call mp_sum(becp%k,inter_bgrp_comm)
|
||||
endif
|
||||
ENDIF
|
||||
CALL stop_clock_gpu( 'sum_band:calbec' )
|
||||
!
|
||||
! In the EXX case with ultrasoft or PAW, a copy of becp will be
|
||||
! saved in a global variable to be rotated later
|
||||
|
@ -1277,10 +1295,9 @@ END SUBROUTINE sum_bec_gpu
|
|||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE add_becsum_nc_gpu ( na, np, becsum_nc_d, becsum_d )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! This routine multiplies becsum_nc by the identity and the Pauli matrices,
|
||||
! saves it in becsum for the calculation of augmentation charge and
|
||||
! magnetization.
|
||||
!! This routine multiplies \(\text{becsum_nc}\) by the identity and the
|
||||
!! Pauli matrices, saves it in \(\text{becsum}\) for the calculation of
|
||||
!! augmentation charge and magnetization.
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
|
@ -1340,10 +1357,9 @@ END SUBROUTINE add_becsum_nc_gpu
|
|||
!----------------------------------------------------------------------------
|
||||
SUBROUTINE add_becsum_so_gpu( na, np, becsum_nc_d, becsum_d )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
! This routine multiplies becsum_nc by the identity and the Pauli matrices,
|
||||
! rotates it as appropriate for the spin-orbit case, saves it in becsum
|
||||
! for the calculation of augmentation charge and magnetization.
|
||||
!! This routine multiplies \(\text{becsum_nc}\) by the identity and the Pauli
|
||||
!! matrices, rotates it as appropriate for the spin-orbit case, saves it in
|
||||
!! \(\text{becsum}\) for the calculation of augmentation charge and magnetization.
|
||||
!
|
||||
#if defined(__CUDA)
|
||||
USE cudafor
|
||||
|
|
|
@ -92,9 +92,10 @@ SUBROUTINE init_clocks( go )
|
|||
! ... go = .FALSE. : only clock #1 will run
|
||||
!
|
||||
USE util_param, ONLY : DP, stdout
|
||||
USE mytime, ONLY : called, gpu_called, t0cpu, cputime, no, notrunning, maxclock, &
|
||||
clock_label, walltime, t0wall, gputime, nclock, mpi_per_thread
|
||||
USE mytime, ONLY : gpu_starts, gpu_stops
|
||||
USE mytime, ONLY : called, t0cpu, cputime, no, notrunning, maxclock, &
|
||||
clock_label, walltime, t0wall, nclock, mpi_per_thread
|
||||
! ... GPU related timers
|
||||
USE mytime, ONLY : gpu_starts, gpu_stops, gpu_called, gputime
|
||||
#if defined (__TRACE)
|
||||
USE mytime, ONLY : mpime, max_print_depth, MPI_COMM_WORLD
|
||||
#endif
|
||||
|
@ -373,7 +374,7 @@ SUBROUTINE stop_clock( label )
|
|||
RETURN
|
||||
!
|
||||
END SUBROUTINE stop_clock
|
||||
|
||||
!
|
||||
SUBROUTINE stop_clock_gpu( label )
|
||||
!----------------------------------------------------------------------------
|
||||
!
|
||||
|
@ -405,6 +406,10 @@ SUBROUTINE stop_clock_gpu( label )
|
|||
!
|
||||
IF ( no ) RETURN
|
||||
!
|
||||
! ... initialize time used in CUDA APIs if __CUDA is present.
|
||||
!
|
||||
time = 0.0
|
||||
!
|
||||
! ... prevent trouble if label is longer than 12 characters
|
||||
!
|
||||
label_ = trim ( label )
|
||||
|
@ -457,7 +462,7 @@ SUBROUTINE print_clock( label )
|
|||
!----------------------------------------------------------------------------
|
||||
!
|
||||
USE util_param, ONLY : stdout
|
||||
USE mytime, ONLY : nclock, clock_label
|
||||
USE mytime, ONLY : nclock, clock_label, gpu_called
|
||||
!
|
||||
IMPLICIT NONE
|
||||
!
|
||||
|
@ -465,6 +470,9 @@ SUBROUTINE print_clock( label )
|
|||
!
|
||||
CHARACTER(len=12) :: label_
|
||||
INTEGER :: n
|
||||
LOGICAL :: print_gpu
|
||||
!
|
||||
print_gpu = ANY(gpu_called > 0)
|
||||
!
|
||||
IF ( label == ' ' ) THEN
|
||||
!
|
||||
|
@ -473,7 +481,7 @@ SUBROUTINE print_clock( label )
|
|||
DO n = 1, nclock
|
||||
!
|
||||
CALL print_this_clock( n )
|
||||
CALL print_this_clock_gpu( n )
|
||||
IF(print_gpu) CALL print_this_clock_gpu( n )
|
||||
!
|
||||
ENDDO
|
||||
!
|
||||
|
@ -488,7 +496,7 @@ SUBROUTINE print_clock( label )
|
|||
IF ( clock_label(n) == label_ ) THEN
|
||||
!
|
||||
CALL print_this_clock( n )
|
||||
CALL print_this_clock_gpu( n )
|
||||
IF(print_gpu) CALL print_this_clock_gpu( n )
|
||||
!
|
||||
exit
|
||||
!
|
||||
|
|
|
@ -56,7 +56,7 @@ inputs_args = ('c.scf.in', '1'), ('c.phG.in', '2'), ('ni.scf.in', '1'), ('ni.phX
|
|||
|
||||
[ph_metal/]
|
||||
program = PH
|
||||
inputs_args = ('al.scf.fit.in', '1'), ('al.scf.in', '1'), ('al.elph.in', '2'), ('q2r.in', '3'), ('matdyn.in.freq', '4'), ('matdyn.in.dos', '4'), ('lambda.in', '5')
|
||||
inputs_args = ('al.scf.fit.in', '1'), ('al.scf.in', '1'), ('al.elph.in', '2'), ('al.elph.notrans.in', '2'), ('q2r.in', '3'), ('matdyn.in.freq', '4'), ('matdyn.in.dos', '4'), ('lambda.in', '5')
|
||||
|
||||
[ph_U_metal_us/]
|
||||
program = PH
|
||||
|
|
|
@ -13,7 +13,7 @@ SUBROUTINE elphon()
|
|||
! Electron-phonon calculation from data saved in fildvscf
|
||||
!
|
||||
USE kinds, ONLY : DP
|
||||
USE constants, ONLY : amu_ry
|
||||
USE constants, ONLY : amu_ry, RY_TO_THZ, RY_TO_CMM1
|
||||
USE cell_base, ONLY : celldm, omega, ibrav, at, bg
|
||||
USE ions_base, ONLY : nat, ntyp => nsp, ityp, tau, amass
|
||||
USE gvecs, ONLY: doublegrid
|
||||
|
@ -51,7 +51,7 @@ SUBROUTINE elphon()
|
|||
COMPLEX(DP), allocatable :: phip (:, :, :, :)
|
||||
|
||||
INTEGER :: ntyp_, nat_, ibrav_, nspin_mag_, mu, nu, na, nb, nta, ntb, nqs_
|
||||
REAL(DP) :: celldm_(6)
|
||||
REAL(DP) :: celldm_(6), w1
|
||||
CHARACTER(LEN=3) :: atm(ntyp)
|
||||
|
||||
CALL start_clock ('elphon')
|
||||
|
@ -166,9 +166,27 @@ SUBROUTINE elphon()
|
|||
|
||||
deallocate( phip )
|
||||
ENDIF
|
||||
ENDIF
|
||||
!
|
||||
! Write phonon frequency to stdout
|
||||
!
|
||||
WRITE( stdout, 8000) (xq (i), i = 1, 3)
|
||||
!
|
||||
DO nu = 1, 3 * nat
|
||||
w1 = SQRT( ABS( w2(nu) ) )
|
||||
if (w2(nu) < 0.d0) w1 = - w1
|
||||
WRITE( stdout, 8010) nu, w1 * RY_TO_THZ, w1 * RY_TO_CMM1
|
||||
ENDDO
|
||||
!
|
||||
WRITE( stdout, '(1x,74("*"))')
|
||||
!
|
||||
ENDIF ! .NOT. trans
|
||||
!
|
||||
CALL stop_clock ('elphon')
|
||||
!
|
||||
8000 FORMAT(/,5x,'Diagonalizing the dynamical matrix', &
|
||||
& //,5x,'q = ( ',3f14.9,' ) ',//,1x,74('*'))
|
||||
8010 FORMAT (5x,'freq (',i5,') =',f15.6,' [THz] =',f15.6,' [cm-1]')
|
||||
!
|
||||
RETURN
|
||||
END SUBROUTINE elphon
|
||||
!
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
Hubbard_U(1) = 2.0
|
||||
/
|
||||
&electrons
|
||||
startingwfc = 'atomic'
|
||||
conv_thr = 1.d-14
|
||||
mixing_beta = 0.3
|
||||
/
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
Electron-phonon coefficients for Al
|
||||
&inputph
|
||||
tr2_ph=1.0d-10,
|
||||
prefix='aluminum',
|
||||
fildvscf='aldv',
|
||||
amass(1)=26.98,
|
||||
outdir='./',
|
||||
fildyn='al.dyn',
|
||||
electron_phonon='interpolated',
|
||||
el_ph_sigma=0.005,
|
||||
el_ph_nsigma=10,
|
||||
trans=.false.,
|
||||
ldisp=.true.
|
||||
nq1=4, nq2=4, nq3=4
|
||||
/
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue