Merge commit '3c87bac5e67b5e30b9c5d7e7d3a69f9fb4285e1b' into gpu-develop

This commit is contained in:
Pietro Bonfa 2020-04-11 19:05:36 +02:00
commit 56c3090769
21 changed files with 8923 additions and 258 deletions

View File

@ -170,11 +170,6 @@ MODULE fft_types
INTEGER, ALLOCATABLE :: srh(:,:) ! Isend/recv handles by subbatch
#endif
COMPLEX(DP), ALLOCATABLE, DIMENSION(:) :: aux
#if defined(_OPENMP)
INTEGER, ALLOCATABLE :: comm2s(:) ! multiple communicator for the fft group along the second direction
INTEGER, ALLOCATABLE :: comm3s(:) ! multiple communicator for the fft group along the third direction
#endif
END TYPE
REAL(DP) :: fft_dual = 4.0d0
@ -245,14 +240,6 @@ CONTAINS
CALL MPI_COMM_SPLIT( comm, color, key, desc%comm3, ierr )
CALL MPI_COMM_RANK( desc%comm3, desc%mype3, ierr )
CALL MPI_COMM_SIZE( desc%comm3, desc%nproc3, ierr )
#if defined(_OPENMP)
ALLOCATE( desc%comm2s( OMP_GET_MAX_THREADS() ))
ALLOCATE( desc%comm3s( OMP_GET_MAX_THREADS() ))
DO i=1, OMP_GET_MAX_THREADS()
CALL MPI_COMM_DUP(desc%comm2, desc%comm2s(i), ierr)
CALL MPI_COMM_DUP(desc%comm3, desc%comm3s(i), ierr)
ENDDO
#endif
#else
desc%comm2 = desc%comm ; desc%mype2 = desc%mype ; desc%nproc2 = desc%nproc
desc%comm3 = desc%comm ; desc%mype3 = desc%mype ; desc%nproc3 = desc%nproc
@ -446,14 +433,6 @@ CONTAINS
#if defined(__MPI)
IF (desc%comm2 /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2, ierr )
IF (desc%comm3 /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3, ierr )
#if defined(_OPENMP)
DO i=1, SIZE(desc%comm2s)
IF (desc%comm2s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2s(i), ierr )
IF (desc%comm3s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3s(i), ierr )
ENDDO
DEALLOCATE( desc%comm2s )
DEALLOCATE( desc%comm3s )
#endif
#else
desc%comm2 = MPI_COMM_NULL
desc%comm3 = MPI_COMM_NULL
@ -493,24 +472,6 @@ CONTAINS
INTEGER :: ierr
!write (6,*) ' inside fft_type_set' ; FLUSH(6)
!
#if defined(__MPI)
#if defined(_OPENMP)
IF (nmany > OMP_GET_MAX_THREADS()) THEN
DO i=1, SIZE(desc%comm2s)
IF (desc%comm2s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm2s(i), ierr )
IF (desc%comm3s(i) /= MPI_COMM_NULL) CALL MPI_COMM_FREE( desc%comm3s(i), ierr )
ENDDO
DEALLOCATE( desc%comm2s )
DEALLOCATE( desc%comm3s )
ALLOCATE( desc%comm2s( nmany ))
ALLOCATE( desc%comm3s( nmany ))
DO i=1, nmany
CALL MPI_COMM_DUP(desc%comm2, desc%comm2s(i), ierr)
CALL MPI_COMM_DUP(desc%comm3, desc%comm3s(i), ierr)
ENDDO
ENDIF
#endif
#endif
!
IF (.NOT. ALLOCATED( desc%nsp ) ) &
CALL fftx_error__(' fft_type_set ', ' fft arrays not yet allocated ', 1 )

View File

@ -61,7 +61,7 @@ SUBROUTINE hp_postproc
eps3 = 1.d-4 ! the same threshold for the comparison of distances
! as in PW/src/inter_V.f90 DFT+U+V
!
CHARACTER(len=50) :: filenameU
CHARACTER(len=256) :: filenameU
INTEGER, EXTERNAL :: find_free_unit
!
CALL start_clock('hp_calc_U')

View File

@ -13,7 +13,7 @@ SUBROUTINE elphon()
! Electron-phonon calculation from data saved in fildvscf
!
USE kinds, ONLY : DP
USE constants, ONLY : amu_ry
USE constants, ONLY : amu_ry, RY_TO_THZ, RY_TO_CMM1
USE cell_base, ONLY : celldm, omega, ibrav, at, bg
USE ions_base, ONLY : nat, ntyp => nsp, ityp, tau, amass
USE gvecs, ONLY: doublegrid
@ -50,7 +50,7 @@ SUBROUTINE elphon()
COMPLEX(DP), allocatable :: phip (:, :, :, :)
INTEGER :: ntyp_, nat_, ibrav_, nspin_mag_, mu, nu, na, nb, nta, ntb, nqs_
REAL(DP) :: celldm_(6)
REAL(DP) :: celldm_(6), w1
CHARACTER(LEN=3) :: atm(ntyp)
CALL start_clock ('elphon')
@ -165,9 +165,27 @@ SUBROUTINE elphon()
deallocate( phip )
ENDIF
ENDIF
!
! Write phonon frequency to stdout
!
WRITE( stdout, 8000) (xq (i), i = 1, 3)
!
DO nu = 1, 3 * nat
w1 = SQRT( ABS( w2(nu) ) )
if (w2(nu) < 0.d0) w1 = - w1
WRITE( stdout, 8010) nu, w1 * RY_TO_THZ, w1 * RY_TO_CMM1
ENDDO
!
WRITE( stdout, '(1x,74("*"))')
!
ENDIF ! .NOT. trans
!
CALL stop_clock ('elphon')
!
8000 FORMAT(/,5x,'Diagonalizing the dynamical matrix', &
& //,5x,'q = ( ',3f14.9,' ) ',//,1x,74('*'))
8010 FORMAT (5x,'freq (',i5,') =',f15.6,' [THz] =',f15.6,' [cm-1]')
!
RETURN
END SUBROUTINE elphon
!

View File

@ -83,16 +83,17 @@ SUBROUTINE openfilq()
ELSE
! this is the standard treatment
IF (lgamma.AND.modenum==0.AND..NOT.newgrid ) tmp_dir=tmp_dir_save
IF ((noncolin.AND.domag).OR.lsda) tmp_dir=tmp_dir_phq
! FIXME: why this case?
IF ( noncolin.AND.domag ) tmp_dir=tmp_dir_phq
ENDIF
!!!!!!!!!!!!!!!!!!!!!!!! END OF ACFDT TEST !!!!!!!!!!!!!!!!
iuwfc = 20
lrwfc = nbnd * npwx * npol
CALL open_buffer (iuwfc, 'wfc', lrwfc, io_level, exst_mem, exst, tmp_dir)
IF (.NOT.exst.AND..NOT.exst_mem.and..not.all_done) THEN
tmp_dir = tmp_dir_phq
!FIXME Dirty fix for obscure case, likely obsolete?
CALL close_buffer(iuwfc, 'delete')
!FIXME Dirty fix for obscure case
tmp_dir = tmp_dir_phq
CALL open_buffer (iuwfc, 'wfc', lrwfc, io_level, exst_mem, exst, tmp_dir)
IF (.NOT.exst.AND..NOT.exst_mem) CALL errore ('openfilq', 'file '//trim(prefix)//'.wfc not found', 1)
END IF

View File

@ -89,8 +89,8 @@ example05:
tensor for AlAs.
example06
This example shows how to use ph.x to calculate
the phonon frequencies at Gamma and X of fcc-Pt.
This example shows how to use ph.x to calculate the phonon frequencies
at Gamma and X and the dispersion for fcc-Pt with spin-orbit interactions.
example07:
This example tests pw.x and ph.x in several cases that require the
@ -120,7 +120,7 @@ example12:
modes of a molecule (SiH4) at Gamma.
example13:
Deleted
Full dispersions for spin-polarized phonons (Ni)
example14:
This example shows how to use ph.x to calculate the phonon frequencies

View File

@ -10,3 +10,5 @@ The calculation proceeds as follows:
output=pt.ph.out).
3) make a phonon calculation at X (input=pt.phX.in, output=pt.phX.out).
4) make a phonon dispersion calculation (input=pt.ph.in, output=pt.ph.out).

File diff suppressed because it is too large Load Diff

View File

@ -157,5 +157,22 @@ $PH_COMMAND < pt.phX.in > pt.phX.out
check_failure $?
$ECHO " done"
cat > pt.ph.in << EOF
phonon dispersions of Pt
&inputph
amass(1)=195.078,
prefix='platinum',
outdir='$TMP_DIR'
fildyn='ptdyn',
tr2_ph=1.0d-16,
ldisp=.true., nq1=4,nq2=4,nq3=4
/
EOF
$ECHO " running the phonon dispersions calculation for Pt with spin-orbit coupling...\c"
$PH_COMMAND < pt.ph.in > pt.ph.out
check_failure $?
$ECHO " done"
$ECHO
$ECHO "$EXAMPLE_DIR: done"

View File

@ -29,6 +29,6 @@ The calculation proceeds as follows:
insulator with noncollinear magnetization.
(input=o2_nc.scf.in, output=o2_nc.scf.out)
8) make a self-consistent calculation for the O2 molecule treated as an
insulator with noncollinear magnetization.
8) make a phonon calculatio at the Gamma point for the O2 molecule
treated as an insulator with noncollinear magnetization.
(input=o2_nc.phG.in, output=o2_nc.phG.out)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,461 @@
Program PWSCF v.6.5 starts on 19Mar2020 at 9:46: 9
This program is part of the open-source Quantum ESPRESSO suite
for quantum simulation of materials; please cite
"P. Giannozzi et al., J. Phys.:Condens. Matter 21 395502 (2009);
"P. Giannozzi et al., J. Phys.:Condens. Matter 29 465901 (2017);
URL http://www.quantum-espresso.org",
in publications or presentations arising from this work. More details at
http://www.quantum-espresso.org/quote
Parallel version (MPI & OpenMP), running on 4 processor cores
Number of MPI processes: 4
Threads/MPI process: 1
MPI processes distributed on 1 nodes
R & G space division: proc/nbgrp/npool/nimage = 4
Waiting for input...
Reading input from standard input
Current dimensions of program PWSCF are:
Max number of different atomic species (ntypx) = 10
Max number of k-points (npk) = 40000
Max angular momentum in pseudopotentials (lmaxx) = 3
file Ni.pbe-nd-rrkjus.UPF: wavefunction(s) 4S renormalized
Subspace diagonalization in iterative solution of the eigenvalue problem:
a serial algorithm will be used
Parallelization info
--------------------
sticks: dense smooth PW G-vecs: dense smooth PW
Min 112 40 15 1604 351 82
Max 113 41 16 1607 354 83
Sum 451 163 61 6423 1411 331
bravais-lattice index = 2
lattice parameter (alat) = 6.6500 a.u.
unit-cell volume = 73.5199 (a.u.)^3
number of atoms/cell = 1
number of atomic types = 1
number of electrons = 10.00
number of Kohn-Sham states= 9
kinetic-energy cutoff = 27.0000 Ry
charge density cutoff = 300.0000 Ry
convergence threshold = 1.0E-08
mixing beta = 0.7000
number of iterations used = 8 plain mixing
Exchange-correlation= SLA PW PBE PBE
( 1 4 3 4 0 0 0)
celldm(1)= 6.650000 celldm(2)= 0.000000 celldm(3)= 0.000000
celldm(4)= 0.000000 celldm(5)= 0.000000 celldm(6)= 0.000000
crystal axes: (cart. coord. in units of alat)
a(1) = ( -0.500000 0.000000 0.500000 )
a(2) = ( 0.000000 0.500000 0.500000 )
a(3) = ( -0.500000 0.500000 0.000000 )
reciprocal axes: (cart. coord. in units 2 pi/alat)
b(1) = ( -1.000000 -1.000000 1.000000 )
b(2) = ( 1.000000 1.000000 1.000000 )
b(3) = ( -1.000000 1.000000 -1.000000 )
PseudoPot. # 1 for Ni read from file:
/home/giannozz/q-e-mio/pseudo/Ni.pbe-nd-rrkjus.UPF
MD5 check sum: d71bc9c4c8adef96ad6fe9664ede368e
Pseudo is Ultrasoft + core correction, Zval = 10.0
Generated by new atomic code, or converted to UPF format
Using radial grid of 1203 points, 6 beta functions with:
l(1) = 0
l(2) = 0
l(3) = 1
l(4) = 1
l(5) = 2
l(6) = 2
Q(r) pseudized with 0 coefficients
atomic species valence mass pseudopotential
Ni 10.00 58.69340 Ni( 1.00)
Starting magnetic structure
atomic species magnetization
Ni 0.500
48 Sym. Ops., with inversion, found
Cartesian axes
site n. atom positions (alat units)
1 Ni tau( 1) = ( 0.0000000 0.0000000 0.0000000 )
number of k points= 10 Marzari-Vanderbilt smearing, width (Ry)= 0.0200
cart. coord. in units 2pi/alat
k( 1) = ( -0.1250000 0.1250000 0.1250000), wk = 0.0312500
k( 2) = ( -0.3750000 0.3750000 -0.1250000), wk = 0.0937500
k( 3) = ( 0.3750000 -0.3750000 0.6250000), wk = 0.0937500
k( 4) = ( 0.1250000 -0.1250000 0.3750000), wk = 0.0937500
k( 5) = ( -0.1250000 0.6250000 0.1250000), wk = 0.0937500
k( 6) = ( 0.6250000 -0.1250000 0.8750000), wk = 0.1875000
k( 7) = ( 0.3750000 0.1250000 0.6250000), wk = 0.1875000
k( 8) = ( -0.1250000 -0.8750000 0.1250000), wk = 0.0937500
k( 9) = ( -0.3750000 0.3750000 0.3750000), wk = 0.0312500
k( 10) = ( 0.3750000 -0.3750000 1.1250000), wk = 0.0937500
Dense grid: 6423 G-vectors FFT dimensions: ( 25, 25, 25)
Smooth grid: 1411 G-vectors FFT dimensions: ( 15, 15, 15)
Estimated max dynamical RAM per process > 7.84 MB
Estimated total dynamical RAM > 31.37 MB
Generating pointlists ...
new r_m : 0.2917 (alat units) 1.9397 (a.u.) for type 1
Check: negative core charge= -0.000021
Initial potential from superposition of free atoms
starting charge 9.99954, renormalised to 10.00000
Starting wfcs are 6 randomized atomic wfcs + 3 random wfcs
total cpu time spent up to now is 0.4 secs
Self-consistent Calculation
iteration # 1 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 1.00E-02, avg # of iterations = 4.2
total cpu time spent up to now is 0.4 secs
total energy = -85.61582607 Ry
Harris-Foulkes estimate = -85.78369204 Ry
estimated scf accuracy < 0.60123762 Ry
total magnetization = 1.63 Bohr mag/cell
absolute magnetization = 1.65 Bohr mag/cell
iteration # 2 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 6.01E-03, avg # of iterations = 2.0
total cpu time spent up to now is 0.5 secs
total energy = -85.74795377 Ry
Harris-Foulkes estimate = -86.04555041 Ry
estimated scf accuracy < 0.81456890 Ry
total magnetization = 0.70 Bohr mag/cell
absolute magnetization = 0.75 Bohr mag/cell
iteration # 3 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 6.01E-03, avg # of iterations = 1.1
total cpu time spent up to now is 0.5 secs
total energy = -85.88902499 Ry
Harris-Foulkes estimate = -85.86964512 Ry
estimated scf accuracy < 0.02587232 Ry
total magnetization = 0.85 Bohr mag/cell
absolute magnetization = 1.00 Bohr mag/cell
iteration # 4 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 2.59E-04, avg # of iterations = 1.6
total cpu time spent up to now is 0.6 secs
total energy = -85.89672647 Ry
Harris-Foulkes estimate = -85.89651894 Ry
estimated scf accuracy < 0.00091973 Ry
total magnetization = 0.72 Bohr mag/cell
absolute magnetization = 0.84 Bohr mag/cell
iteration # 5 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 9.20E-06, avg # of iterations = 2.0
total cpu time spent up to now is 0.6 secs
total energy = -85.89693407 Ry
Harris-Foulkes estimate = -85.89693937 Ry
estimated scf accuracy < 0.00010875 Ry
total magnetization = 0.70 Bohr mag/cell
absolute magnetization = 0.82 Bohr mag/cell
iteration # 6 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 1.09E-06, avg # of iterations = 1.6
total cpu time spent up to now is 0.7 secs
total energy = -85.89698810 Ry
Harris-Foulkes estimate = -85.89696699 Ry
estimated scf accuracy < 0.00004828 Ry
total magnetization = 0.71 Bohr mag/cell
absolute magnetization = 0.81 Bohr mag/cell
iteration # 7 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 4.83E-07, avg # of iterations = 1.0
total cpu time spent up to now is 0.7 secs
total energy = -85.89698843 Ry
Harris-Foulkes estimate = -85.89698777 Ry
estimated scf accuracy < 0.00000129 Ry
total magnetization = 0.71 Bohr mag/cell
absolute magnetization = 0.81 Bohr mag/cell
iteration # 8 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 1.29E-08, avg # of iterations = 2.1
total cpu time spent up to now is 0.8 secs
total energy = -85.89698924 Ry
Harris-Foulkes estimate = -85.89698895 Ry
estimated scf accuracy < 0.00000064 Ry
total magnetization = 0.71 Bohr mag/cell
absolute magnetization = 0.80 Bohr mag/cell
iteration # 9 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 6.40E-09, avg # of iterations = 1.2
total cpu time spent up to now is 0.8 secs
total energy = -85.89698930 Ry
Harris-Foulkes estimate = -85.89698920 Ry
estimated scf accuracy < 0.00000020 Ry
total magnetization = 0.71 Bohr mag/cell
absolute magnetization = 0.80 Bohr mag/cell
iteration # 10 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 2.02E-09, avg # of iterations = 1.0
total cpu time spent up to now is 0.9 secs
total energy = -85.89698931 Ry
Harris-Foulkes estimate = -85.89698930 Ry
estimated scf accuracy < 0.00000002 Ry
total magnetization = 0.71 Bohr mag/cell
absolute magnetization = 0.80 Bohr mag/cell
iteration # 11 ecut= 27.00 Ry beta= 0.70
Davidson diagonalization with overlap
ethr = 2.13E-10, avg # of iterations = 1.0
Magnetic moment per site:
atom: 1 charge: 8.7379 magn: 0.7488 constr: 0.0000
total cpu time spent up to now is 0.9 secs
End of self-consistent calculation
------ SPIN UP ------------
k =-0.1250 0.1250 0.1250 ( 172 PWs) bands (ev):
5.8697 11.5737 11.8317 11.8317 12.8611 12.8611 35.2150 39.1169
41.0563
k =-0.3750 0.3750-0.1250 ( 171 PWs) bands (ev):
8.5750 11.2499 11.8341 12.1285 12.7519 13.6726 27.1041 32.6457
39.6758
k = 0.3750-0.3750 0.6250 ( 172 PWs) bands (ev):
9.6622 11.5165 11.9825 12.1969 13.5532 15.4846 20.4981 33.7467
36.0278
k = 0.1250-0.1250 0.3750 ( 169 PWs) bands (ev):
7.3630 11.1755 12.0271 12.1374 12.6933 13.1367 31.2697 36.2534
36.8261
k =-0.1250 0.6250 0.1250 ( 178 PWs) bands (ev):
9.3858 10.5801 12.0472 12.7102 13.4795 13.7865 28.1567 31.5072
32.3294
k = 0.6250-0.1250 0.8750 ( 179 PWs) bands (ev):
10.3858 10.6410 11.6236 12.9145 13.5150 19.0389 22.3264 26.0100
28.3109
k = 0.3750 0.1250 0.6250 ( 174 PWs) bands (ev):
10.0138 11.0551 11.4267 12.4905 13.2322 15.3089 24.0931 29.7561
32.8980
k =-0.1250-0.8750 0.1250 ( 176 PWs) bands (ev):
9.7772 10.1642 12.8691 13.3034 13.6221 16.7879 24.9788 26.3753
30.0885
k =-0.3750 0.3750 0.3750 ( 174 PWs) bands (ev):
9.0448 11.8253 11.8253 12.3362 13.3394 13.3394 23.0015 37.0668
39.2789
k = 0.3750-0.3750 1.1250 ( 176 PWs) bands (ev):
10.3652 11.0167 11.5576 12.5016 13.2684 17.7549 21.2363 27.2374
34.3326
------ SPIN DOWN ----------
k =-0.1250 0.1250 0.1250 ( 172 PWs) bands (ev):
5.8235 12.4452 12.7306 12.7306 13.5993 13.5993 35.2386 38.9839
41.0911
k =-0.3750 0.3750-0.1250 ( 171 PWs) bands (ev):
8.6208 11.9920 12.5953 12.9299 13.5958 14.4987 27.2785 32.7142
39.6077
k = 0.3750-0.3750 0.6250 ( 172 PWs) bands (ev):
10.1825 12.1397 12.7501 12.7926 14.4701 15.8906 20.9029 33.7520
36.0975
k = 0.1250-0.1250 0.3750 ( 169 PWs) bands (ev):
7.3328 11.9983 12.8358 13.0200 13.4874 13.9185 31.3755 36.3333
36.7643
k =-0.1250 0.6250 0.1250 ( 178 PWs) bands (ev):
9.5396 11.3428 12.7065 13.5760 14.3301 14.5163 28.2785 31.5780
32.3842
k = 0.6250-0.1250 0.8750 ( 179 PWs) bands (ev):
10.8818 11.3220 12.3443 13.6454 14.5133 19.3212 22.5349 26.1705
28.4085
k = 0.3750 0.1250 0.6250 ( 174 PWs) bands (ev):
10.3493 11.6766 12.1579 13.2575 14.1339 15.9186 24.3093 29.8491
32.9693
k =-0.1250-0.8750 0.1250 ( 176 PWs) bands (ev):
10.2090 10.8957 13.6527 14.1097 14.5846 17.0385 25.1835 26.4722
30.1022
k =-0.3750 0.3750 0.3750 ( 174 PWs) bands (ev):
9.3306 12.6014 12.6014 12.6765 14.2264 14.2264 23.2891 36.8996
39.3685
k = 0.3750-0.3750 1.1250 ( 176 PWs) bands (ev):
10.9698 11.5109 12.2799 13.2468 14.2186 18.1064 21.5401 27.3703
34.3960
the Fermi energy is 14.2874 ev
! total energy = -85.89698931 Ry
Harris-Foulkes estimate = -85.89698931 Ry
estimated scf accuracy < 1.3E-09 Ry
The total energy is the sum of the following terms:
one-electron contribution = -2.06436173 Ry
hartree contribution = 15.23370153 Ry
xc contribution = -30.12053187 Ry
ewald contribution = -68.94529435 Ry
smearing contrib. (-TS) = -0.00050289 Ry
total magnetization = 0.71 Bohr mag/cell
absolute magnetization = 0.80 Bohr mag/cell
convergence has been achieved in 11 iterations
Forces acting on atoms (cartesian axes, Ry/au):
atom 1 type 1 force = 0.00000000 0.00000000 0.00000000
Total force = 0.000000 Total SCF correction = 0.000000
Writing output data file /home/giannozz/q-e-mio/tempdir/nickel.save/
init_run : 0.09s CPU 0.09s WALL ( 1 calls)
electrons : 0.48s CPU 0.53s WALL ( 1 calls)
forces : 0.01s CPU 0.01s WALL ( 1 calls)
Called by init_run:
wfcinit : 0.01s CPU 0.01s WALL ( 1 calls)
potinit : 0.01s CPU 0.01s WALL ( 1 calls)
hinit0 : 0.06s CPU 0.06s WALL ( 1 calls)
Called by electrons:
c_bands : 0.27s CPU 0.30s WALL ( 11 calls)
sum_band : 0.13s CPU 0.13s WALL ( 11 calls)
v_of_rho : 0.05s CPU 0.05s WALL ( 12 calls)
newd : 0.03s CPU 0.04s WALL ( 12 calls)
mix_rho : 0.01s CPU 0.01s WALL ( 11 calls)
Called by c_bands:
init_us_2 : 0.01s CPU 0.01s WALL ( 480 calls)
cegterg : 0.25s CPU 0.28s WALL ( 220 calls)
Called by sum_band:
sum_band:bec : 0.00s CPU 0.00s WALL ( 220 calls)
addusdens : 0.08s CPU 0.08s WALL ( 11 calls)
Called by *egterg:
h_psi : 0.18s CPU 0.20s WALL ( 617 calls)
s_psi : 0.00s CPU 0.00s WALL ( 617 calls)
g_psi : 0.00s CPU 0.00s WALL ( 377 calls)
cdiaghg : 0.05s CPU 0.05s WALL ( 597 calls)
Called by h_psi:
h_psi:calbec : 0.01s CPU 0.01s WALL ( 617 calls)
vloc_psi : 0.16s CPU 0.18s WALL ( 617 calls)
add_vuspsi : 0.00s CPU 0.01s WALL ( 617 calls)
General routines
calbec : 0.01s CPU 0.01s WALL ( 917 calls)
fft : 0.03s CPU 0.03s WALL ( 313 calls)
ffts : 0.00s CPU 0.00s WALL ( 46 calls)
fftw : 0.18s CPU 0.20s WALL ( 11200 calls)
interpolate : 0.00s CPU 0.00s WALL ( 24 calls)
Parallel routines
fft_scatt_xy : 0.03s CPU 0.03s WALL ( 11559 calls)
fft_scatt_yz : 0.11s CPU 0.12s WALL ( 11559 calls)
PWSCF : 0.84s CPU 0.93s WALL
This run was terminated on: 9:46:10 19Mar2020
=------------------------------------------------------------------------------=
JOB DONE.
=------------------------------------------------------------------------------=

View File

@ -0,0 +1,139 @@
#!/bin/sh
# run from directory where this script is
cd `echo $0 | sed 's/\(.*\)\/.*/\1/'` # extract pathname
EXAMPLE_DIR=`pwd`
# check whether ECHO has the -e option
if test "`echo -e`" = "-e" ; then ECHO=echo ; else ECHO="echo -e" ; fi
$ECHO
$ECHO "$EXAMPLE_DIR : starting"
$ECHO
$ECHO "This example shows how to use pw.x and ph.x to calculate phonon"
$ECHO "dispersions for spin-polarized fcc-Ni."
# set the needed environment variables
. ../../../environment_variables
# required executables and pseudopotentials
BIN_LIST="pw.x ph.x"
PSEUDO_LIST="Ni.pbe-nd-rrkjus.UPF"
$ECHO
$ECHO " executables directory: $BIN_DIR"
$ECHO " pseudo directory: $PSEUDO_DIR"
$ECHO " temporary directory: $TMP_DIR"
$ECHO
$ECHO " checking that needed directories and files exist...\c"
# check for directories
for DIR in "$BIN_DIR" "$PSEUDO_DIR" ; do
if test ! -d $DIR ; then
$ECHO
$ECHO "ERROR: $DIR not existent or not a directory"
$ECHO "Aborting"
exit 1
fi
done
for DIR in "$TMP_DIR" "$EXAMPLE_DIR/results" ; do
if test ! -d $DIR ; then
mkdir $DIR
fi
done
cd $EXAMPLE_DIR/results
# check for executables
for FILE in $BIN_LIST ; do
if test ! -x $BIN_DIR/$FILE ; then
$ECHO
$ECHO "ERROR: $BIN_DIR/$FILE not existent or not executable"
$ECHO "Aborting"
exit 1
fi
done
# check for pseudopotentials
for FILE in $PSEUDO_LIST ; do
if test ! -r $PSEUDO_DIR/$FILE ; then
$ECHO
$ECHO "Downloading $FILE to $PSEUDO_DIR...\c"
$WGET $PSEUDO_DIR/$FILE $NETWORK_PSEUDO/$FILE 2> /dev/null
fi
if test $? != 0; then
$ECHO
$ECHO "ERROR: $PSEUDO_DIR/$FILE not existent or not readable"
$ECHO "Aborting"
exit 1
fi
done
$ECHO " done"
# how to run executables
PW_COMMAND="$PARA_PREFIX $BIN_DIR/pw.x $PARA_POSTFIX"
PH_COMMAND="$PARA_PREFIX $BIN_DIR/ph.x $PARA_POSTFIX"
$ECHO
$ECHO " running pw.x as: $PW_COMMAND"
$ECHO " running ph.x as: $PH_COMMAND"
$ECHO
# clean TMP_DIR
$ECHO " cleaning $TMP_DIR...\c"
rm -rf $TMP_DIR/nickel*
rm -rf $TMP_DIR/_ph0/nickel*
$ECHO " done"
# self-consistent calculation for Ni with US-PP
cat > ni.scf.in << EOF
&control
calculation='scf'
restart_mode='from_scratch',
tprnfor = .true.
prefix='nickel',
pseudo_dir = '$PSEUDO_DIR/',
outdir='$TMP_DIR/'
/
&system
ibrav=2, celldm(1) =6.65, nat= 1, ntyp= 1,
nspin=2,
starting_magnetization(1)=0.5,
degauss=0.02,
smearing='mv',
occupations='smearing',
ecutwfc =27.0
ecutrho =300.0
/
&electrons
conv_thr = 1.0d-8
mixing_beta = 0.7
/
ATOMIC_SPECIES
Ni 58.6934 Ni.pbe-nd-rrkjus.UPF
ATOMIC_POSITIONS (alat)
Ni 0.00 0.00 0.00
K_POINTS AUTOMATIC
4 4 4 1 1 1
EOF
$ECHO " running the scf calculation for Ni...\c"
$PW_COMMAND < ni.scf.in > ni.scf.out
check_failure $?
$ECHO " done"
# phonon dispersion calculation
cat > ni.ph.in << EOF
phonons of Ni
&inputph
tr2_ph=1.0d-14,
prefix='nickel',
amass(1)=58.6934,
fildyn='niX.dyn',
outdir='$TMP_DIR/',
ldisp=.true., nq1=4,nq2=4,nq3=4
/
EOF
$ECHO " running the phonon dispersion calculation for Ni...\c"
$PH_COMMAND < ni.ph.in > ni.ph.out
check_failure $?
$ECHO " done"
$ECHO
$ECHO "$EXAMPLE_DIR: done"

View File

@ -79,39 +79,96 @@ SUBROUTINE print_clock_pw()
WRITE( stdout, '(/5x,"Called by c_bands:")' )
CALL print_clock( 'init_us_2' )
IF ( isolve == 0 ) THEN
IF ( gamma_only ) THEN
CALL print_clock( 'regterg' )
ELSE
CALL print_clock( 'cegterg' )
ENDIF
CALL print_clock( 'regterg' ) ; CALL print_clock( 'cegterg' )
ELSE IF (isolve == 1) THEN
IF ( gamma_only ) THEN
CALL print_clock( 'rcgdiagg' )
ELSE
CALL print_clock( 'ccgdiagg' )
ENDIF
CALL print_clock( 'rcgdiagg' ) ; CALL print_clock( 'ccgdiagg' )
CALL print_clock( 'wfcrot' )
ELSE IF (isolve == 2) THEN
IF ( gamma_only ) THEN
CALL print_clock( 'ppcg_gamma' )
ELSE
CALL print_clock( 'ppcg_k' )
ENDIF
CALL print_clock( 'ppcg_gamma' ) ; CALL print_clock( 'ppcg_k' )
CALL print_clock( 'wfcrot' )
ELSE IF (isolve == 3) THEN
CALL print_clock( 'paro_gamma' ) ; CALL print_clock( 'paro_k' )
ENDIF
!
!IF ( iverbosity > 0) THEN
WRITE( stdout, '(/5x,"Called by sum_band:")' )
CALL print_clock( 'sum_band:weights' )
CALL print_clock( 'sum_band:loop' )
CALL print_clock( 'sum_band:buffer' )
CALL print_clock( 'sum_band:init_us_2' )
CALL print_clock( 'sum_band:calbec' )
CALL print_clock( 'sum_band:becsum' )
CALL print_clock( 'addusdens' )
!ENDIF
!
IF ( isolve == 0 ) THEN
WRITE( stdout, '(/5x,"Called by *egterg:")' )
IF ( gamma_only ) THEN
CALL print_clock( 'rdiaghg' )
IF ( iverbosity > 0 ) THEN
CALL print_clock( 'regterg:overlap' )
CALL print_clock( 'regterg:update' )
CALL print_clock( 'regterg:last' )
CALL print_clock( 'rdiaghg:choldc' )
CALL print_clock( 'rdiaghg:inversion' )
CALL print_clock( 'rdiaghg:paragemm' )
ENDIF
ELSE
CALL print_clock( 'cdiaghg' )
IF ( iverbosity > 0 ) THEN
CALL print_clock( 'cegterg:overlap' )
CALL print_clock( 'cegterg:update' )
CALL print_clock( 'cegterg:last' )
CALL print_clock( 'cdiaghg:choldc' )
CALL print_clock( 'cdiaghg:inversion' )
CALL print_clock( 'cdiaghg:paragemm' )
END IF
END IF
ELSE IF ( isolve == 1 ) THEN
WRITE( stdout, '(/5x,"Called by *cgdiagg:")' )
ELSE IF ( isolve == 2 ) THEN
WRITE( stdout, '(/5x,"Called by ppcg_*:")' )
! IF ( iverbosity > 0 ) THEN
CALL print_clock( 'ppcg:zgemm' ) ; CALL print_clock( 'ppcg:dgemm' )
CALL print_clock( 'ppcg:hpsi' )
CALL print_clock( 'ppcg:cholQR' )
CALL print_clock( 'ppcg:RR' )
CALL print_clock( 'ppcg:ZTRSM' ) ; CALL print_clock( 'ppcg:DTRSM' )
CALL print_clock( 'ppcg:lock' )
! END IF
ELSE IF ( isolve == 3 ) THEN
WRITE( stdout, '(/5x,"Called by paro_*:")' )
! IF ( iverbosity > 0 ) THEN
CALL print_clock( 'paro:init' )
CALL print_clock( 'paro:pack' )
CALL print_clock( 'paro:zero' )
CALL print_clock( 'paro:mp_bar' )
CALL print_clock( 'paro:mp_sum' )
CALL print_clock( 'pcg' )
CALL print_clock( 'pcg:hs_1psi' )
CALL print_clock( 'pcg:ortho' )
CALL print_clock( 'pcg:move' )
CALL print_clock( 'rotHSw' )
CALL print_clock( 'rotHSw:move' )
CALL print_clock( 'rotHSw:hc' )
CALL print_clock( 'rotHSw:diag' )
CALL print_clock( 'rotHSw:evc' )
CALL print_clock( 'rotHSw:hc:b0' ) ;
CALL print_clock( 'rotHSw:hc:s1' ) ; call print_clock('rotHSw:hc:comp')
CALL print_clock( 'rotHSw:hc:b1' ) ;
CALL print_clock( 'rotHSw:hc:s2' ) ;
CALL print_clock( 'rotHSw:hc:s3' ) ; call print_clock('rotHSw:hc:rs')
CALL print_clock( 'rotHSw:hc:b2' ) ; call print_clock('rotHSw:hc:sy')
CALL print_clock( 'rotHSw:hc:s4' ) ; CALL print_clock('rotHSw:hc:b3' )
CALL print_clock( 'rotHSw:ev:b0' ) ;
CALL print_clock( 'rotHSw:ev:b3' ) ; call print_clock('rotHSw:ev:bc')
CALL print_clock( 'rotHSw:ev:s5' ) ;
CALL print_clock( 'rotHSw:ev:b4' ) ; call print_clock('rotHSw:ev:comp')
CALL print_clock( 'rotHSw:ev:s6' ) ;
CALL print_clock( 'rotHSw:ev:b5' ) ; call print_clock('rotHSw:ev:sum')
CALL print_clock( 'rotHSw:ev:s7' ) ; CALL print_clock('rotHSw:ev:b6' )
! END IF
END IF
!
CALL print_clock( 'h_psi' )
@ -130,37 +187,6 @@ SUBROUTINE print_clock_pw()
CALL print_clock ( 'fwfft_orbital' )
CALL print_clock ( 'v_loc_psir' )
ENDIF
IF ( gamma_only ) THEN
CALL print_clock( 'rdiaghg' )
IF ( iverbosity > 0 ) THEN
CALL print_clock( 'regterg:overlap' )
CALL print_clock( 'regterg:update' )
CALL print_clock( 'regterg:last' )
CALL print_clock( 'rdiaghg:choldc' )
CALL print_clock( 'rdiaghg:inversion' )
CALL print_clock( 'rdiaghg:paragemm' )
ENDIF
ELSE
CALL print_clock( 'cdiaghg' )
IF ( iverbosity > 0 ) THEN
CALL print_clock( 'cegterg:overlap' )
CALL print_clock( 'cegterg:update' )
CALL print_clock( 'cegterg:last' )
CALL print_clock( 'cdiaghg:choldc' )
CALL print_clock( 'cdiaghg:inversion' )
CALL print_clock( 'cdiaghg:paragemm' )
END IF
END IF
IF ( isolve == 2 ) THEN
! IF ( iverbosity > 0 ) THEN
CALL print_clock( 'ppcg:zgemm' ) ; CALL print_clock( 'ppcg:dgemm' )
CALL print_clock( 'ppcg:hpsi' )
CALL print_clock( 'ppcg:cholQR' )
CALL print_clock( 'ppcg:RR' )
CALL print_clock( 'ppcg:ZTRSM' ) ; CALL print_clock( 'ppcg:DTRSM' )
CALL print_clock( 'ppcg:lock' )
! END IF
END IF
!
WRITE( stdout, '(/5x,"Called by h_psi:")' )
CALL print_clock( 'h_psi:calbec' )

View File

@ -78,7 +78,9 @@ SUBROUTINE sum_band()
!
! ... calculates weights of Kohn-Sham orbitals used in calculation of rho
!
CALL start_clock( 'sum_band:weights' )
CALL weights ( )
CALL stop_clock( 'sum_band:weights' )
!
IF (one_atom_occupations) CALL new_evc()
!
@ -122,6 +124,7 @@ SUBROUTINE sum_band()
!
eband = 0.D0
!
CALL start_clock( 'sum_band:loop' )
IF ( gamma_only ) THEN
!
CALL sum_band_gamma()
@ -131,6 +134,7 @@ SUBROUTINE sum_band()
CALL sum_band_k()
!
END IF
CALL stop_clock( 'sum_band:loop' )
CALL mp_sum( eband, inter_pool_comm )
CALL mp_sum( eband, inter_bgrp_comm )
!
@ -185,6 +189,7 @@ SUBROUTINE sum_band()
!
! ... symmetrize rho(G)
!
CALL start_clock( 'sum_band:sym_rho' )
CALL sym_rho ( nspin_mag, rho%of_g )
!
! ... synchronize rho%of_r to the calculated rho%of_g (use psic as work array)
@ -222,6 +227,7 @@ SUBROUTINE sum_band()
END DO
!
END IF
CALL stop_clock( 'sum_band:sym_rho' )
!
! ... if LSDA rho%of_r and rho%of_g are converted from (up,dw) to
! ... (up+dw,up-dw) format.
@ -286,13 +292,21 @@ SUBROUTINE sum_band()
!
npw = ngk(ik)
!
CALL start_clock( 'sum_band:buffer' )
IF ( nks > 1 ) &
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
IF ( nks > 1 ) CALL using_evc(1) ! get_buffer(evc, ...) evc is updated (intent out)
CALL stop_clock( 'sum_band:buffer' )
!
CALL start_clock( 'sum_band:init_us_2' )
!
IF ( nkb > 0 ) CALL using_vkb(1)
!
IF ( nkb > 0 ) &
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
CALL stop_clock( 'sum_band:init_us_2' )
!
! ... here we compute the band energy: the sum of the eigenvalues
!
@ -548,13 +562,20 @@ SUBROUTINE sum_band()
IF ( lsda ) current_spin = isk(ik)
npw = ngk (ik)
!
CALL start_clock( 'sum_band:buffer' )
IF ( nks > 1 ) &
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
IF ( nks > 1 ) CALL using_evc(1)
CALL stop_clock( 'sum_band:buffer' )
!
CALL start_clock( 'sum_band:init_us_2' )
!
IF ( nkb > 0 ) CALL using_vkb(1)
IF ( nkb > 0 ) &
CALL init_us_2( npw, igk_k(1,ik), xk(1,ik), vkb )
CALL stop_clock( 'sum_band:init_us_2' )
!
! ... here we compute the band energy: the sum of the eigenvalues
!
@ -926,6 +947,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
CALL using_indv_ijkb0(0)
CALL using_becp_auto(2)
!
CALL start_clock( 'sum_band:calbec' )
npw = ngk(ik)
IF ( .NOT. real_space ) THEN
! calbec computes becp = <vkb_i|psi_j>
@ -947,6 +969,7 @@ SUBROUTINE sum_bec ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
call mp_sum(becp%k,inter_bgrp_comm)
endif
ENDIF
CALL stop_clock( 'sum_band:calbec' )
!
! In the EXX case with ultrasoft or PAW, a copy of becp will be
! saved in a global variable to be rotated later

View File

@ -84,7 +84,9 @@ SUBROUTINE sum_band_gpu()
!
! ... calculates weights of Kohn-Sham orbitals used in calculation of rho
!
CALL start_clock_gpu( 'sum_band:weights' )
CALL weights ( )
CALL stop_clock_gpu( 'sum_band:weights' )
!
IF (one_atom_occupations) CALL new_evc()
!
@ -128,6 +130,7 @@ SUBROUTINE sum_band_gpu()
!
eband = 0.D0
!
CALL start_clock_gpu( 'sum_band:loop' )
IF ( gamma_only ) THEN
!
CALL sum_band_gamma_gpu()
@ -137,6 +140,7 @@ SUBROUTINE sum_band_gpu()
CALL sum_band_k_gpu()
!
END IF
CALL stop_clock_gpu( 'sum_band:loop' )
CALL mp_sum( eband, inter_pool_comm )
CALL mp_sum( eband, inter_bgrp_comm )
!
@ -193,6 +197,7 @@ SUBROUTINE sum_band_gpu()
!
! ... symmetrize rho(G)
!
CALL start_clock_gpu( 'sum_band:sym_rho' )
CALL sym_rho ( nspin_mag, rho%of_g )
!
! ... synchronize rho%of_r to the calculated rho%of_g (use psic as work array)
@ -230,6 +235,7 @@ SUBROUTINE sum_band_gpu()
END DO
!
END IF
CALL stop_clock_gpu( 'sum_band:sym_rho' )
!
! ... if LSDA rho%of_r and rho%of_g are converted from (up,dw) to
! ... (up+dw,up-dw) format.
@ -247,8 +253,7 @@ SUBROUTINE sum_band_gpu()
!-----------------------------------------------------------------------
SUBROUTINE sum_band_gamma_gpu()
!-----------------------------------------------------------------------
!
! ... gamma version
!! \(\texttt{sum_band}\) - part for gamma version.
!
USE becmod, ONLY : becp
USE mp_bands, ONLY : me_bgrp
@ -309,13 +314,19 @@ SUBROUTINE sum_band_gpu()
!
npw = ngk(ik)
!
CALL start_clock_gpu( 'sum_band:buffer' )
IF ( nks > 1 ) &
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
IF ( nks > 1 ) CALL using_evc(2) ! get_buffer(evc, ...) evc is updated (intent out)
IF ( nks > 1 ) CALL using_evc_d(0) ! sync on the GPU
!
CALL stop_clock_gpu( 'sum_band:buffer' )
!
CALL start_clock_gpu( 'sum_band:init_us_2' )
IF ( nkb > 0 ) CALL using_vkb_d(2)
IF ( nkb > 0 ) CALL init_us_2_gpu( npw, igk_k_d(1,ik), xk(1,ik), vkb_d )
CALL stop_clock_gpu( 'sum_band:init_us_2' )
!
! ... here we compute the band energy: the sum of the eigenvalues
!
@ -523,8 +534,7 @@ SUBROUTINE sum_band_gpu()
!-----------------------------------------------------------------------
SUBROUTINE sum_band_k_gpu()
!-----------------------------------------------------------------------
!
! ... k-points version
!! \(\texttt{sum_band}\) - part for k-points version
!
USE wavefunctions_gpum, ONLY : psic_nc_d
USE mp_bands, ONLY : me_bgrp
@ -611,14 +621,18 @@ SUBROUTINE sum_band_gpu()
IF ( lsda ) current_spin = isk(ik)
npw = ngk (ik)
!
CALL start_clock_gpu( 'sum_band:buffer' )
IF ( nks > 1 ) &
CALL get_buffer ( evc, nwordwfc, iunwfc, ik )
IF ( nks > 1 ) CALL using_evc(2)
IF ( nks > 1 ) CALL using_evc_d(0) ! sync evc on GPU, OPTIMIZE (use async here)
CALL stop_clock_gpu( 'sum_band:buffer' )
!
CALL start_clock_gpu( 'sum_band:init_us_2' )
IF ( nkb > 0 ) CALL using_vkb_d(2)
IF ( nkb > 0 ) &
CALL init_us_2_gpu( npw, igk_k_d(1,ik), xk(1,ik), vkb_d )
CALL stop_clock_gpu( 'sum_band:init_us_2' )
!
! ... here we compute the band energy: the sum of the eigenvalues
!
@ -996,13 +1010,15 @@ END SUBROUTINE sum_band_gpu
SUBROUTINE sum_bec_gpu ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd )
!----------------------------------------------------------------------------
!
! This routine computes the sum over bands
! \sum_i <\psi_i|\beta_l>w_i<\beta_m|\psi_i>
! for point "ik" and, for LSDA, spin "current_spin"
! Calls calbec to compute "becp"=<beta_m|psi_i>
! Output is accumulated (unsymmetrized) into "becsum", module "uspp"
!! This routine computes the sum over bands:
!
! Routine used in sum_band (if okvan) and in compute_becsum, called by hinit1 (if okpaw)
!! \[ \sum_i \langle\psi_i|\beta_l\rangle w_i \langle\beta_m|\psi_i\rangle \]
!
!! for point "ik" and, for LSDA, spin "current_spin".
!! Calls calbec to compute \(\text{"becp"}=\langle \beta_m|\psi_i \rangle\).
!! Output is accumulated (unsymmetrized) into "becsum", module "uspp".
!
!! Routine used in sum_band (if okvan) and in compute_becsum, called by hinit1 (if okpaw).
!
#if defined(__CUDA)
USE cudafor
@ -1064,6 +1080,7 @@ SUBROUTINE sum_bec_gpu ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd
CALL using_becsum_d(1)
IF (tqr) CALL using_ebecsum_d(1)
!
CALL start_clock_gpu( 'sum_band:calbec' )
npw = ngk(ik)
IF ( .NOT. real_space ) THEN
CALL using_evc_d(0); CALL using_vkb_d(0); CALL using_becp_d_auto(2)
@ -1087,6 +1104,7 @@ SUBROUTINE sum_bec_gpu ( ik, current_spin, ibnd_start, ibnd_end, this_bgrp_nbnd
call mp_sum(becp%k,inter_bgrp_comm)
endif
ENDIF
CALL stop_clock_gpu( 'sum_band:calbec' )
!
! In the EXX case with ultrasoft or PAW, a copy of becp will be
! saved in a global variable to be rotated later
@ -1277,10 +1295,9 @@ END SUBROUTINE sum_bec_gpu
!----------------------------------------------------------------------------
SUBROUTINE add_becsum_nc_gpu ( na, np, becsum_nc_d, becsum_d )
!----------------------------------------------------------------------------
!
! This routine multiplies becsum_nc by the identity and the Pauli matrices,
! saves it in becsum for the calculation of augmentation charge and
! magnetization.
!! This routine multiplies \(\text{becsum_nc}\) by the identity and the
!! Pauli matrices, saves it in \(\text{becsum}\) for the calculation of
!! augmentation charge and magnetization.
!
#if defined(__CUDA)
USE cudafor
@ -1340,10 +1357,9 @@ END SUBROUTINE add_becsum_nc_gpu
!----------------------------------------------------------------------------
SUBROUTINE add_becsum_so_gpu( na, np, becsum_nc_d, becsum_d )
!----------------------------------------------------------------------------
!
! This routine multiplies becsum_nc by the identity and the Pauli matrices,
! rotates it as appropriate for the spin-orbit case, saves it in becsum
! for the calculation of augmentation charge and magnetization.
!! This routine multiplies \(\text{becsum_nc}\) by the identity and the Pauli
!! matrices, rotates it as appropriate for the spin-orbit case, saves it in
!! \(\text{becsum}\) for the calculation of augmentation charge and magnetization.
!
#if defined(__CUDA)
USE cudafor

View File

@ -92,9 +92,10 @@ SUBROUTINE init_clocks( go )
! ... go = .FALSE. : only clock #1 will run
!
USE util_param, ONLY : DP, stdout
USE mytime, ONLY : called, gpu_called, t0cpu, cputime, no, notrunning, maxclock, &
clock_label, walltime, t0wall, gputime, nclock, mpi_per_thread
USE mytime, ONLY : gpu_starts, gpu_stops
USE mytime, ONLY : called, t0cpu, cputime, no, notrunning, maxclock, &
clock_label, walltime, t0wall, nclock, mpi_per_thread
! ... GPU related timers
USE mytime, ONLY : gpu_starts, gpu_stops, gpu_called, gputime
#if defined (__TRACE)
USE mytime, ONLY : mpime, max_print_depth, MPI_COMM_WORLD
#endif
@ -373,7 +374,7 @@ SUBROUTINE stop_clock( label )
RETURN
!
END SUBROUTINE stop_clock
!
SUBROUTINE stop_clock_gpu( label )
!----------------------------------------------------------------------------
!
@ -405,6 +406,10 @@ SUBROUTINE stop_clock_gpu( label )
!
IF ( no ) RETURN
!
! ... initialize time used in CUDA APIs if __CUDA is present.
!
time = 0.0
!
! ... prevent trouble if label is longer than 12 characters
!
label_ = trim ( label )
@ -457,7 +462,7 @@ SUBROUTINE print_clock( label )
!----------------------------------------------------------------------------
!
USE util_param, ONLY : stdout
USE mytime, ONLY : nclock, clock_label
USE mytime, ONLY : nclock, clock_label, gpu_called
!
IMPLICIT NONE
!
@ -465,6 +470,9 @@ SUBROUTINE print_clock( label )
!
CHARACTER(len=12) :: label_
INTEGER :: n
LOGICAL :: print_gpu
!
print_gpu = ANY(gpu_called > 0)
!
IF ( label == ' ' ) THEN
!
@ -473,7 +481,7 @@ SUBROUTINE print_clock( label )
DO n = 1, nclock
!
CALL print_this_clock( n )
CALL print_this_clock_gpu( n )
IF(print_gpu) CALL print_this_clock_gpu( n )
!
ENDDO
!
@ -488,7 +496,7 @@ SUBROUTINE print_clock( label )
IF ( clock_label(n) == label_ ) THEN
!
CALL print_this_clock( n )
CALL print_this_clock_gpu( n )
IF(print_gpu) CALL print_this_clock_gpu( n )
!
exit
!

View File

@ -56,7 +56,7 @@ inputs_args = ('c.scf.in', '1'), ('c.phG.in', '2'), ('ni.scf.in', '1'), ('ni.phX
[ph_metal/]
program = PH
inputs_args = ('al.scf.fit.in', '1'), ('al.scf.in', '1'), ('al.elph.in', '2'), ('q2r.in', '3'), ('matdyn.in.freq', '4'), ('matdyn.in.dos', '4'), ('lambda.in', '5')
inputs_args = ('al.scf.fit.in', '1'), ('al.scf.in', '1'), ('al.elph.in', '2'), ('al.elph.notrans.in', '2'), ('q2r.in', '3'), ('matdyn.in.freq', '4'), ('matdyn.in.dos', '4'), ('lambda.in', '5')
[ph_U_metal_us/]
program = PH

View File

@ -13,7 +13,7 @@ SUBROUTINE elphon()
! Electron-phonon calculation from data saved in fildvscf
!
USE kinds, ONLY : DP
USE constants, ONLY : amu_ry
USE constants, ONLY : amu_ry, RY_TO_THZ, RY_TO_CMM1
USE cell_base, ONLY : celldm, omega, ibrav, at, bg
USE ions_base, ONLY : nat, ntyp => nsp, ityp, tau, amass
USE gvecs, ONLY: doublegrid
@ -51,7 +51,7 @@ SUBROUTINE elphon()
COMPLEX(DP), allocatable :: phip (:, :, :, :)
INTEGER :: ntyp_, nat_, ibrav_, nspin_mag_, mu, nu, na, nb, nta, ntb, nqs_
REAL(DP) :: celldm_(6)
REAL(DP) :: celldm_(6), w1
CHARACTER(LEN=3) :: atm(ntyp)
CALL start_clock ('elphon')
@ -166,9 +166,27 @@ SUBROUTINE elphon()
deallocate( phip )
ENDIF
ENDIF
!
! Write phonon frequency to stdout
!
WRITE( stdout, 8000) (xq (i), i = 1, 3)
!
DO nu = 1, 3 * nat
w1 = SQRT( ABS( w2(nu) ) )
if (w2(nu) < 0.d0) w1 = - w1
WRITE( stdout, 8010) nu, w1 * RY_TO_THZ, w1 * RY_TO_CMM1
ENDDO
!
WRITE( stdout, '(1x,74("*"))')
!
ENDIF ! .NOT. trans
!
CALL stop_clock ('elphon')
!
8000 FORMAT(/,5x,'Diagonalizing the dynamical matrix', &
& //,5x,'q = ( ',3f14.9,' ) ',//,1x,74('*'))
8010 FORMAT (5x,'freq (',i5,') =',f15.6,' [THz] =',f15.6,' [cm-1]')
!
RETURN
END SUBROUTINE elphon
!

View File

@ -25,6 +25,7 @@
Hubbard_U(1) = 2.0
/
&electrons
startingwfc = 'atomic'
conv_thr = 1.d-14
mixing_beta = 0.3
/

View File

@ -0,0 +1,15 @@
Electron-phonon coefficients for Al
&inputph
tr2_ph=1.0d-10,
prefix='aluminum',
fildvscf='aldv',
amass(1)=26.98,
outdir='./',
fildyn='al.dyn',
electron_phonon='interpolated',
el_ph_sigma=0.005,
el_ph_nsigma=10,
trans=.false.,
ldisp=.true.
nq1=4, nq2=4, nq3=4
/

File diff suppressed because it is too large Load Diff