Merge branch 'EPWv5.6' into 'impurity_scattering_transport'

# Conflicts:
#   EPW/src/bcast_epw_input.f90
#   EPW/src/epw_readin.f90
This commit is contained in:
Joshua Leveillee 2022-10-27 19:51:33 +00:00
commit e2958c8b3e
1442 changed files with 237471 additions and 172089 deletions

View File

@ -72,19 +72,19 @@ pgi serial k80:
- make run-tests-cp-serial
pgi serial v100 :
tags: [galileo,v100]
script:
- module purge
- module load profile/global hpc-sdk/20.9--binary mkl/2019--binary python
- ./configure --disable-parallel --enable-openmp --with-cuda-runtime=10.1 --with-cuda-cc=70 --with-cuda=yes --enable-cuda-env-check=no
- make -j pw cp
- if [ -z ${SLURM_CPUS_PER_TASK} ]; then export OMP_NUM_THREADS=1; else export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK; fi
- echo "Using $OMP_NUM_THREADS threads"
- cd test-suite
- make clean
- make run-tests-pw-serial
- make run-tests-cp-serial
#pgi serial v100 :
# tags: [galileo,v100]
# script:
# - module purge
# - module load profile/global hpc-sdk/20.9--binary mkl/2019--binary python
# - ./configure --disable-parallel --enable-openmp --with-cuda-runtime=10.1 --with-cuda-cc=70 --with-cuda=yes --enable-cuda-env-check=no
# - make -j pw cp
# - if [ -z ${SLURM_CPUS_PER_TASK} ]; then export OMP_NUM_THREADS=1; else #export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK; fi
# - echo "Using $OMP_NUM_THREADS threads"
# - cd test-suite
# - make clean
# - make run-tests-pw-serial
# - make run-tests-cp-serial
pgi207 power v100:
tags: [marconi100]

View File

@ -1,7 +1,9 @@
build:pw:
tags: [docker]
image: espressofoundation/ubuntu:latest
image: ubuntu:18.04
script:
- apt-get -qq update
- apt-get -qq install m4 make wget curl git gfortran gcc libopenblas-dev libfftw3-dev libopenmpi-dev
- ./configure
- make pw
# - export OMP_NUM_THREADS=1
@ -12,8 +14,10 @@ build:pw:
build:cp:
tags: [docker]
image: espressofoundation/ubuntu:latest
image: ubuntu:18.04
script:
- apt-get -qq update
- apt-get -qq install m4 make wget curl git gfortran gcc libopenblas-dev libfftw3-dev libopenmpi-dev
- ./configure
- make cp
# - export OMP_NUM_THREADS=1
@ -52,11 +56,14 @@ build:cmake-gnu:
-DMPIEXEC_PREFLAGS="--allow-run-as-root;--oversubscribe" .. && make
&& make pw ph hp pwcond neb pp pwall cp tddfpt gwl ld1 upf xspectra couple epw all_currents
&& ctest -L unit --output-on-failure
- make install DESTDIR=`pwd`/install_root
build:pgi:
tags: [docker]
image: nvcr.io/nvidia/nvhpc:20.9-devel-centos7
image: nvcr.io/nvidia/nvhpc:20.9-devel-ubuntu20.04
script:
- apt-get -qq update
- apt-get -qq install ca-certificates git m4
- ./configure FC=pgf90 F90=pgf90 F77=pgfortran MPIF90=mpif90 --enable-openmp --with-cuda=yes --enable-cuda-env-check=no
- make pw cp ph

9
.gitmodules vendored
View File

@ -10,9 +10,12 @@
[submodule "external/mbd"]
path = external/mbd
url = https://github.com/libmbd/libmbd.git
[submodule "external/eigensolver_gpu"]
path = external/eigensolver_gpu
url = https://github.com/NVIDIA/Eigensolver_gpu.git
[submodule "external/devxlib"]
path = external/devxlib
url = https://gitlab.com/max-centre/components/devicexlib.git
[submodule "external/d3q"]
path = external/d3q
url = https://github.com/anharmonic/d3q.git
[submodule "external/pw2qmcpack"]
path = external/pw2qmcpack
url = https://github.com/QMCPACK/pw2qmcpack.git

View File

@ -13,7 +13,7 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
project(qe
VERSION 6.8
VERSION 7.1
DESCRIPTION "ESPRESSO: opEn-Source Package for Research in Electronic Structure, Simulation, and Optimization"
LANGUAGES Fortran C)
@ -68,8 +68,6 @@ option(QE_ENABLE_CUDA
"enable CUDA acceleration on NVIDIA GPUs" OFF)
if(QE_ENABLE_CUDA)
option(QE_ENABLE_OPENACC "enable OpenACC acceleration" ON)
option(QE_ENABLE_LAXLIB_CUSOLVER
"enable CUDA solver acceleration for LAXLib on NVIDIA GPUs" ON)
# OpenMP enabled by default if CUDA is enable
option(QE_ENABLE_OPENMP
"enable distributed execution support via OpenMP" ON)
@ -100,6 +98,9 @@ option(QE_LAPACK_INTERNAL
"enable internal reference LAPACK" OFF)
option(QE_ENABLE_SCALAPACK
"enable SCALAPACK execution units" OFF)
cmake_dependent_option(QE_ENABLE_SCALAPACK_QRCP
"enable SCALAPACK QRCP in pw2wannier90 (requires SCALAPACK>=2.1.0 or Intel MKL>=2020)"
OFF "QE_ENABLE_SCALAPACK" OFF)
option(QE_ENABLE_ELPA
"enable ELPA execution units" OFF)
option(QE_ENABLE_LIBXC
@ -113,6 +114,39 @@ option(QE_ENABLE_DOC
set(QE_FFTW_VENDOR "AUTO" CACHE
STRING "select a specific FFTW library [Intel_DFTI, Intel_FFTW3, ArmPL, IBMESSL, FFTW3, Internal]")
set(QE_ENABLE_SANITIZER "none" CACHE STRING "none,asan,ubsan,tsan,msan")
set(QE_ENABLE_PLUGINS "" CACHE STRING "Semicolon-separated list of plugins")
option(QE_ENABLE_FOX
"enable XML I/O via Fox library" OFF)
if(QE_ENABLE_FOX)
if(FOX_ROOT)
set(QE_FOX_INTERNAL OFF)
endif()
option(QE_FOX_INTERNAL
"enable FoX intenal library" ON)
endif()
if(WANNIER90_ROOT)
set(QE_WANNIER90_INTERNAL OFF)
endif()
option(QE_WANNIER90_INTERNAL
"enable Wannier90 intenal library" ON)
if(MBD_ROOT)
set(QE_MBD_INTERNAL OFF)
endif()
option(QE_MBD_INTERNAL
"enable LibMBD intenal library" ON)
if(DEVICEXLIB_ROOT)
set(QE_DEVICEXLIB_INTERNAL OFF)
endif()
option(QE_DEVICEXLIB_INTERNAL
"enable DeviceXlib intenal library" ON)
if(ENVIRON_ROOT)
set(ENVIRON_DEFAULT "EXTERNAL")
else()
set(ENVIRON_DEFAULT "NO")
endif()
set(QE_ENABLE_ENVIRON "${ENVIRON_DEFAULT}" CACHE
STRING "select a specific Environ library [NO, EXTERNAL, INTERNAL]")
# TODO change all ifdefs throughout code base to match
# cmake options
@ -156,6 +190,13 @@ endif()
if(QE_ENABLE_HDF5)
qe_add_global_compile_definitions(__HDF5)
endif()
if(QE_ENABLE_ENVIRON)
qe_add_global_compile_definitions(__ENVIRON)
endif()
if(QE_CLOCK_SECONDS)
qe_add_global_compile_definitions(__CLOCK_SECONDS)
endif()
# Feature checks
check_function_exists(mallinfo HAVE_MALLINFO)
@ -182,9 +223,6 @@ endif()
if(QE_ENABLE_MPI_GPU_AWARE AND NOT (QE_ENABLE_CUDA AND QE_ENABLE_MPI))
message(FATAL_ERROR "GPU aware MPI requires both MPI and CUDA features enabled")
endif()
if(QE_ENABLE_LAXLIB_CUSOLVER AND (NOT QE_ENABLE_CUDA))
message(FATAL_ERROR "CUDA Solver for LAXLib requires CUDA support, enable it with '-DQE_ENABLE_CUDA=ON' or disable CUDA Solver for LAXLib with '-DQE_ENABLE_LAXLIB_CUSOLVER=OFF'")
endif()
# if(QE_ENABLE_HDF5 AND NOT QE_ENABLE_MPI)
# message(FATAL_ERROR "HDF5 requires MPI support, enable it with '-DQE_ENABLE_MPI=ON' or disable HDF5 with '-DQE_ENABLE_HDF5=OFF'")
# endif()
@ -202,6 +240,16 @@ if(NOT QE_ENABLE_SANITIZER STREQUAL "none" AND NOT CMAKE_Fortran_COMPILER_ID MAT
message(FATAL_ERROR "-DQE_ENABLE_SANITIZER=${QE_ENABLE_SANITIZER} only works with the GNU compiler")
endif()
# valid plugins checks
set(VALID_QE_PLUGINS "d3q" "pw2qmcpack")
# Perform sanitizer option check, only works in debug mode
foreach(PLUGIN IN LISTS QE_ENABLE_PLUGINS)
if(NOT PLUGIN IN_LIST VALID_QE_PLUGINS)
message(FATAL_ERROR "Invalid QE plugin ${PLUGIN}, value must be one of \"${VALID_QE_PLUGINS}\".")
else()
message(STATUS "Enable QE plugin ${PLUGIN}")
endif()
endforeach()
############################################################
# C preprocessor
@ -315,8 +363,8 @@ if(QE_ENABLE_OPENACC)
target_link_libraries(qe_openacc_fortran INTERFACE OpenACC::OpenACC_Fortran)
target_link_libraries(qe_openacc_c INTERFACE OpenACC::OpenACC_C)
if(GPU_TARGET_COMPILE_OPTIONS)
target_compile_options(qe_openacc_fortran INTERFACE "${GPU_TARGET_COMPILE_OPTIONS}")
target_compile_options(qe_openacc_c INTERFACE "${GPU_TARGET_COMPILE_OPTIONS}")
target_compile_options(qe_openacc_fortran INTERFACE "$<$<COMPILE_LANGUAGE:Fortran>:${GPU_TARGET_COMPILE_OPTIONS}>")
target_compile_options(qe_openacc_c INTERFACE "$<$<COMPILE_LANGUAGE:C>:${GPU_TARGET_COMPILE_OPTIONS}>")
endif()
endif(QE_ENABLE_OPENACC)
@ -429,10 +477,10 @@ else()
endif()
message(STATUS "Installing LAPACK via submodule")
qe_git_submodule_update(external/lapack)
add_subdirectory(external/lapack EXCLUDE_FROM_ALL)
add_subdirectory(external/lapack)
target_link_libraries(qe_lapack INTERFACE lapack)
qe_fix_fortran_modules(lapack)
qe_install_targets(lapack)
# make lapack ready for other external libraries like mbd
set(LAPACK_LIBRARIES lapack)
endif()
###########################################################
@ -448,6 +496,18 @@ if(QE_ENABLE_SCALAPACK)
INTERFACE
${SCALAPACK_LIBRARIES}
${SCALAPACK_LINKER_FLAGS})
if(QE_ENABLE_SCALAPACK_QRCP)
include(CheckFortranFunctionExists)
set(CMAKE_REQUIRED_LIBRARIES "${SCALAPACK_LIBRARIES}")
check_fortran_function_exists("pzgeqpf" SCALAPACK_PZGEQPF_WORKS)
unset(CMAKE_REQUIRED_LIBRARIES)
if(SCALAPACK_PZGEQPF_WORKS)
message(STATUS "Found pzgeqpf, add ScaLAPACK pzgeqpf macro")
qe_add_global_compile_definitions(__SCALAPACK_QRCP)
else()
message(FATAL_ERROR "QE_ENABLE_SCALAPACK_QRCP requested but the current ScaLAPACK installation doesn't contain pzgeqpf!")
endif()
endif()
endif(QE_ENABLE_SCALAPACK)
###########################################################
@ -544,8 +604,7 @@ if(QE_ENABLE_HDF5)
target_link_libraries(qe_hdf5_fortran
INTERFACE
${HDF5_Fortran_LIBRARIES}
${HDF5_Fortran_HL_LIBRARIES})
${HDF5_Fortran_LIBRARIES})
target_include_directories(qe_hdf5_fortran
INTERFACE
${HDF5_Fortran_INCLUDE_DIRS})
@ -555,8 +614,7 @@ if(QE_ENABLE_HDF5)
target_link_libraries(qe_hdf5_c
INTERFACE
${HDF5_C_LIBRARIES}
${HDF5_C_HL_LIBRARIES})
${HDF5_C_LIBRARIES})
target_include_directories(qe_hdf5_c
INTERFACE
${HDF5_C_INCLUDE_DIRS})
@ -616,6 +674,8 @@ add_subdirectory(PWCOND)
add_subdirectory(TDDFPT)
add_subdirectory(XSpectra)
add_subdirectory(QEHeat)
add_subdirectory(KCW)
add_subdirectory(GUI)
if(QE_ENABLE_DOC)
add_subdirectory(Doc)
endif()
@ -672,88 +732,11 @@ add_custom_target(depgraph
###########################################################
# Custom make targets
# The collection 'pwall' is defined here
# Each of the followings is defined inside its subdirectory
# pw ph pp hp pwcond neb cp tddfpt gwl ld1 upf epw
# xspectra couple all_currents
###########################################################
add_custom_target(pw
DEPENDS
qe_pw_exe
qe_pw_tools_ibrav2cell_exe
qe_pw_tools_cell2ibrav_exe
qe_pw_tools_ev_exe
qe_pw_tools_kpoints_exe
qe_pw_tools_pwi2xsf_exe
COMMENT
"basic code for scf, structure optimization, MD")
add_custom_target(ph
DEPENDS
qe_phonon_ph_exe
qe_phonon_dynmat_exe
qe_phonon_q2r_exe
qe_phonon_dvscf_q2r_exe
qe_phonon_matdyn_exe
qe_phonon_q2qstar_exe
qe_phonon_lambda_exe
qe_phonon_alpha2f_exe
qe_phonon_epa_exe
qe_phonon_fqha_exe
qe_phonon_fd_exe
qe_phonon_fdef_exe
qe_phonon_fdifc_exe
qe_phonon_postahc_exe
COMMENT
"phonon code, Gamma-only and third-order derivatives")
add_custom_target(hp
DEPENDS
qe_hp_exe
COMMENT
"calculation of the Hubbard parameters from DFPT")
add_custom_target(pwcond
DEPENDS
qe_pwcond_exe
COMMENT
"ballistic conductance")
add_custom_target(neb
DEPENDS
qe_neb_exe
qe_neb_pathinterpolation_exe
COMMENT
"code for Nudged Elastic Band method")
add_custom_target(pp
DEPENDS
qe_pp_exe
qe_pp_opengrid_exe
qe_pp_average_exe
qe_pp_bands_exe
qe_pp_dos_exe
qe_pp_pawplot_exe
qe_pp_planavg_exe
qe_pp_plotband_exe
qe_pp_plotproj_exe
qe_pp_plotrho_exe
qe_pp_pmw_exe
qe_pp_projwfc_exe
qe_pp_pw2wannier90_exe
qe_pp_pw2critic_exe
qe_pp_wfck2r_exe
qe_pp_initial_state_exe
qe_pp_pw2gw_exe
qe_pp_sumpdos_exe
qe_pp_epsilon_exe
qe_pp_wannierham_exe
qe_pp_wannierplot_exe
qe_pp_molecularpdos_exe
qe_pp_pw2bgw_exe
qe_pp_fermivelocity_exe
qe_pp_fermisurface_exe
qe_pp_fermiproj_exe
qe_pp_ppacf_exe
COMMENT
"postprocessing programs")
add_custom_target(pwall
DEPENDS
pw
@ -763,77 +746,3 @@ add_custom_target(pwall
neb
COMMENT
"same as \"make pw ph pp pwcond neb\"")
add_custom_target(cp
DEPENDS
qe_cpv_exe
qe_cpv_manycp_exe
qe_cpv_cppp_exe
qe_cpv_wfdd_exe
COMMENT
"CP code: Car-Parrinello molecular dynamics")
add_custom_target(tddfpt
DEPENDS
qe_tddfpt_turbolanczos_exe
qe_tddfpt_turbodavidson_exe
qe_tddfpt_turboeels_exe
qe_tddfpt_turbospectrum_exe
qe_tddfpt_turbomagnons_exe
COMMENT
"time dependent dft code")
add_custom_target(gwl
DEPENDS
qe_gww_util_grap_exe
qe_gww_util_abcoefftoeps_exe
qe_gww_util_memorypw4gww_exe
qe_gww_bse_bse_main_exe
qe_gww_gww_exe
qe_gww_gww_fit_exe
qe_gww_head_exe
qe_gww_simple_bse_exe
qe_gww_simple_ip_exe
COMMENT
"GW with Lanczos chains")
add_custom_target(ld1
DEPENDS
qe_atomic_exe
COMMENT
"utilities for pseudopotential generation")
add_custom_target(upf
DEPENDS
#Library
qe_upflib
#Executables
qe_upflib_virtual_v2_exe
qe_upflib_upfconv_exe
COMMENT
"utilities for pseudopotential conversion")
add_custom_target(xspectra
DEPENDS
qe_xspectra_exe
qe_xspectra_spectracorrection_exe
qe_xspectra_molecularnexafs_exe
COMMENT
"X-ray core-hole spectroscopy calculations")
add_custom_target(couple
DEPENDS
qe_couple
COMMENT
"library interface for coupling to external codes")
add_custom_target(epw
DEPENDS
qe_epw_exe
COMMENT
"electron-Phonon Coupling with wannier functions")
add_custom_target(all_currents
DEPENDS
qe_qeheat_exe
COMMENT
"QEHeat code to compute energy and electronic density currents")

View File

@ -14,4 +14,10 @@ target_link_libraries(qe_couple
###########################################################
qe_install_targets(qe_couple)
qe_install_targets(qe_couple)
add_custom_target(couple
DEPENDS
qe_couple
COMMENT
"library interface for coupling to external codes")

View File

@ -21,6 +21,7 @@ set(src_cpv
src/electrons_nose.f90
src/energies.f90
src/ensemble_dft.f90
src/environ_cp_module.f90
src/exch_corr.f90
src/exx_cg.f90
src/exx_es.f90
@ -191,3 +192,12 @@ qe_install_targets(
qe_cpv_manycp_exe
qe_cpv_cppp_exe
qe_cpv_wfdd_exe)
add_custom_target(cp
DEPENDS
qe_cpv_exe
qe_cpv_manycp_exe
qe_cpv_cppp_exe
qe_cpv_wfdd_exe
COMMENT
"CP code: Car-Parrinello molecular dynamics")

View File

@ -9,11 +9,16 @@ input_description -distribution {Quantum Espresso} -package CP -program cp.x {
HARTREE ATOMIC UNITS. Charge is "number" charge (i.e. not multiplied
by e); potentials are in energy units (i.e. they are multiplied by e)
BEWARE: TABS, DOS <CR><LF> CHARACTERS ARE POTENTIAL SOURCES OF TROUBLE
@b BEWARE: TABS, CRLF, ANY OTHER STRANGE CHARACTER, ARE A SOURCES OF TROUBLE
@b USE ONLY PLAIN ASCII TEXT FILES (CHECK THE FILE TYPE WITH UNIX COMMAND "file")
Namelists must appear in the order given below.
Comment lines in namelists can be introduced by a "!", exactly as in
fortran code. Comments lines in ``cards'' can be introduced by
either a "!" or a "#" character in the first position of a line.
Do not start any line in ``cards'' with a "/" character.
Leave a space between card names and card options, e.g.
ATOMIC_POSITIONS (bohr), not ATOMIC_POSITIONS(bohr)
Structure of the input data:
===============================================================================
@ -157,8 +162,7 @@ input_description -distribution {Quantum Espresso} -package CP -program cp.x {
default { .false. }
info {
Write stress tensor to standard output each "iprint" steps.
It is set to .TRUE. automatically if
calculation='vc-relax'
It is set to .TRUE. automatically if calculation='vc-relax'
}
}
@ -683,7 +687,8 @@ input_description -distribution {Quantum Espresso} -package CP -program cp.x {
default { it depends on the specified functional }
info {
Fraction of EXX for hybrid functional calculations. In the case of
input_dft='PBE0', the default value is 0.25.
input_dft='PBE0', the default value is 0.25. This entry overrides
the default (as well as the restart file) value of a given functional.
}
}
@ -1377,7 +1382,9 @@ input_description -distribution {Quantum Espresso} -package CP -program cp.x {
var cell_dynamics -type CHARACTER {
default { 'none' }
default { 'pr' if @ref calculation = 'vc-md', 'vc'cp', 'vc-cp-wf';
'damp-pr' if @ref calculation = 'vc-relax';
'none' otherwise }
info {
set how cell should be moved
'none' : cell is kept fixed

View File

@ -46,7 +46,7 @@
<tr><th style="margin: 3 3 3 10; background: #005789; background: linear-gradient(rgba(0,87,137,1),rgba(0,119,189,1)); color: #ffffee; ">
<h1 style="margin: 10 10 10 15; text-align: left;"> Input File Description </h1>
<h2 style="margin: 10 10 10 15; text-align: left;"> Program:
cp.x / CP / Quantum Espresso<span style="font-weight: normal;"> (version: 6.8)</span>
cp.x / CP / Quantum Espresso<span style="font-weight: normal;"> (version: 7.0)</span>
</h2>
</th></tr>
<tr><td style="padding: 10 3 3 3; background: #ffffff; color: #222222; ">
@ -1138,7 +1138,8 @@ Also see related keywords starting with exx_.
</tr>
<tr><td align="left" valign="top" colspan="2"><blockquote><pre style="margin-bottom: -1em;">
Fraction of EXX for hybrid functional calculations. In the case of
input_dft='PBE0', the default value is 0.25.
input_dft='PBE0', the default value is 0.25. This entry overrides
the default (as well as the restart file) value of a given functional.
</pre></blockquote></td></tr>
</table>
<div align="right" style="margin-bottom: 5;">[<a href="#__top__">Back to Top</a>]</div>
@ -3988,7 +3989,7 @@ separate the corresponding change in parameters with a column.
</td></tr>
</table>
<small>
This file has been created by helpdoc utility on Wed Nov 17 17:34:12 CET 2021.
This file has been created by helpdoc utility on Sat Dec 18 20:08:38 CET 2021.
</small>
</body>
</html>

View File

@ -3,7 +3,7 @@
------------------------------------------------------------------------
INPUT FILE DESCRIPTION
Program: cp.x / CP / Quantum Espresso (version: 6.8)
Program: cp.x / CP / Quantum Espresso (version: 7.0)
------------------------------------------------------------------------
@ -710,7 +710,8 @@ NAMELIST: &SYSTEM
Type: REAL
Default: it depends on the specified functional
Description: Fraction of EXX for hybrid functional calculations. In the case of
input_dft='PBE0', the default value is 0.25.
input_dft='PBE0', the default value is 0.25. This entry overrides
the default (as well as the restart file) value of a given functional.
+--------------------------------------------------------------------
+--------------------------------------------------------------------
@ -2517,4 +2518,4 @@ CARD: PLOT_WANNIER
### END OF SUPERCARD : AUTOPILOT/ENDRULES #############################
This file has been created by helpdoc utility on Wed Nov 17 17:34:12 CET 2021
This file has been created by helpdoc utility on Sat Dec 18 20:08:38 CET 2021

View File

@ -123,6 +123,7 @@ namelist follow:
default { 1 }
info {
Number of replicas of atomic positions along cell parameters.
CURRENTLY DISABLED
If np1, np2, np3 are 1 or not specified, cppp.x does not
replicate atomic positions in space.

View File

@ -46,7 +46,7 @@
<tr><th style="margin: 3 3 3 10; background: #005789; background: linear-gradient(rgba(0,87,137,1),rgba(0,119,189,1)); color: #ffffee; ">
<h1 style="margin: 10 10 10 15; text-align: left;"> Input File Description </h1>
<h2 style="margin: 10 10 10 15; text-align: left;"> Program:
cppp.x / CP / Quantum Espresso<span style="font-weight: normal;"> (version: 6.8)</span>
cppp.x / CP / Quantum Espresso<span style="font-weight: normal;"> (version: 7.0)</span>
</h2>
</th></tr>
<tr><td style="padding: 10 3 3 3; background: #ffffff; color: #222222; ">
@ -265,6 +265,7 @@ This logical flag control the rotation of the cell
</tr>
<tr><td align="left" valign="top" colspan="2"><blockquote><pre style="margin-bottom: -1em;">
Number of replicas of atomic positions along cell parameters.
CURRENTLY DISABLED
If np1, np2, np3 are 1 or not specified, cppp.x does not
replicate atomic positions in space.
@ -331,7 +332,7 @@ atomic_number(2) specify the atomic number of the second specie
</td></tr>
</table>
<small>
This file has been created by helpdoc utility on Wed Nov 17 17:34:12 CET 2021.
This file has been created by helpdoc utility on Sat Dec 18 20:08:38 CET 2021.
</small>
</body>
</html>

View File

@ -3,7 +3,7 @@
------------------------------------------------------------------------
INPUT FILE DESCRIPTION
Program: cppp.x / CP / Quantum Espresso (version: 6.8)
Program: cppp.x / CP / Quantum Espresso (version: 7.0)
------------------------------------------------------------------------
@ -137,6 +137,7 @@ NAMELIST: &INPUTPP
Type: INTEGER
Default: 1
Description: Number of replicas of atomic positions along cell parameters.
CURRENTLY DISABLED
If np1, np2, np3 are 1 or not specified, cppp.x does not
replicate atomic positions in space.
@ -179,4 +180,4 @@ NAMELIST: &INPUTPP
===END OF NAMELIST======================================================
This file has been created by helpdoc utility on Wed Nov 17 17:34:12 CET 2021
This file has been created by helpdoc utility on Sat Dec 18 20:08:38 CET 2021

View File

@ -3,7 +3,7 @@
Introduction
============
This guide covers the usage of the `CP` package, version 6.8, a core
This guide covers the usage of the `CP` package, version 7.0, a core
component of the Quantum ESPRESSO distribution. Further documentation,
beyond what is provided in this guide, can be found in the directory
`CPV/Doc/`, containing a copy of this guide.
@ -28,6 +28,16 @@ merged with `CP`, was developed by Carlo Cavazzoni (Leonardo), Gerardo
Ballabio (CINECA), Sandro Scandolo (ICTP), Guido Chiarotti, Paolo Focher,
and others. We quote in particular:
- Sergio Orlandini (CINECA) for completing the CUDA Fortran acceleration
started by Carlo Cavazzoni
- Fabio Affinito and Maruella Ippolito (CINECA) for testing and benchmarking
- Ivan Carnimeo and Pietro Delugas (SISSA) for further openACC acceleration
- Riccardo Bertossa (SISSA) for extensive refactoring of ensemble dynamics /
conjugate gradient part
- Federico Grasselli and Riccardo Bertossa (SISSA) for bug fixes,
extensions to Autopilot;

View File

@ -28,6 +28,7 @@ electrons.o \
electrons_nose.o \
energies.o \
ensemble_dft.o \
environ_cp_module.o \
exch_corr.o \
exx_cg.o \
exx_es.o \
@ -115,7 +116,7 @@ entropy.o
QEMODS=$(BASEMODS)
TLDEPS= bindir libs mods
TLDEPS= bindir mods
all : tldeps libcp.a manycp.x cp.x wfdd.x cppp.x

View File

@ -1540,7 +1540,7 @@ contains
else
if (do_k) then
if (pre_state) then
!$acc data copyin(g2kin,ave_ene,tpiba2) copy(c0)
!$acc data present(g2kin) copyin(ave_ene,tpiba2) copy(c0)
!$acc parallel loop collapse(2) private(x)
do i = 1, n
do ig = 1, ngw
@ -1595,7 +1595,7 @@ contains
real(kind=dp) :: tmp
!
!$acc parallel loop private(tmp) copyin(c,g2kin,gstart,ngw) copyout(ene_ave)
!$acc parallel loop private(tmp) present(g2kin) copyin(c,gstart,ngw) copyout(ene_ave)
DO i = 1, n
tmp = 0.d0
!$acc loop vector reduction(+:tmp)

View File

@ -28,6 +28,17 @@ MODULE cp_restart_new
USE io_global, ONLY : ionode, ionode_id, stdout
USE mp, ONLY : mp_bcast
USE matrix_inversion
#if defined (__fox)
USE FoX_wxml
USE FoX_dom, ONLY : Node, parseFile, item, getElementsByTagname, &
hasAttribute, extractDataAttribute, &
extractDataContent, destroy
#else
USE wxml
USE dom, ONLY : Node, parseFile, item, getElementsByTagname, &
hasAttribute, extractDataAttribute, &
extractDataContent, destroy
#endif
!
IMPLICIT NONE
!
@ -70,7 +81,7 @@ MODULE cp_restart_new
USE funct, ONLY : get_dft_name, dft_is_nonlocc, get_nonlocc_name
USE xc_lib, ONLY : xclib_dft_is, xclib_get_exx_fraction, &
get_screening_parameter
USE ldaU_cp, ONLY : lda_plus_U, ns, Hubbard_l, &
USE ldaU_cp, ONLY : lda_plus_U, ns, Hubbard_l, Hubbard_n, &
Hubbard_lmax, Hubbard_U
USE energies, ONLY : enthal, ekin, eht, esr, eself, &
epseu, enl, exc, vave
@ -96,8 +107,6 @@ MODULE cp_restart_new
USE qexsd_module, ONLY: qexsd_openschema, qexsd_closeschema, qexsd_xf, &
qexsd_add_all_clocks
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: ndw !
LOGICAL, INTENT(IN) :: ascii !
INTEGER, INTENT(IN) :: nfi ! index of the current step
@ -363,8 +372,9 @@ MODULE cp_restart_new
! ... MAGNETIZATION
!-------------------------------------------------------------------------------
!
CALL qexsd_init_magnetization(output_obj%magnetization, lsda, .false.,&
.false., 0.0_dp, [0.0_dp,0.0_dp, 0.0_dp], 0.0_dp, .false.)
CALL qexsd_init_magnetization(output_obj%magnetization, LSDA = lsda, NONCOLIN = .false.,&
SPINORBIT = .false., ABSOLUTE_MAG = 0.d0, ATM = atm, ITYP = ityp )
output_obj%magnetization_ispresent = lsda
!
!-------------------------------------------------------------------------------
! ... TOTAL ENERGY
@ -555,8 +565,6 @@ MODULE cp_restart_new
ekincm, c02, cm2, wfc )
!------------------------------------------------------------------------
!
USE FoX_dom, ONLY : parseFile, destroy, item, getElementsByTagname,&
Node
USE control_flags, ONLY : gamma_only, force_pairing, llondon,&
ts_vdw, mbd_vdw, lxdm, iverbosity, lwf
USE run_info, ONLY : title
@ -568,7 +576,7 @@ MODULE cp_restart_new
ityp, ions_cofmass
USE gvect, ONLY : ig_l2g, mill
USE cp_main_variables, ONLY : nprint_nfi
USE ldaU_cp, ONLY : lda_plus_U, ns, Hubbard_l, &
USE ldaU_cp, ONLY : lda_plus_U, ns, Hubbard_l, Hubbard_n, &
Hubbard_lmax, Hubbard_U
USE mp, ONLY : mp_sum, mp_bcast
USE mp_global, ONLY : nproc_file, nproc_pool_file, &
@ -586,8 +594,6 @@ MODULE cp_restart_new
qexsd_copy_atomic_species, qexsd_copy_atomic_structure, &
qexsd_copy_basis_set, qexsd_copy_dft, qexsd_copy_band_structure
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: ndr ! I/O unit number
LOGICAL, INTENT(IN) :: ascii !
INTEGER, INTENT(INOUT) :: nfi ! index of the current step
@ -670,19 +676,17 @@ MODULE cp_restart_new
TYPE (general_info_type ) :: geninfo_obj
TYPE (Node),POINTER :: root, nodePointer
CHARACTER(LEN=20) :: dft_name, vdw_corr
CHARACTER(LEN=32) :: exxdiv_treatment, U_projection
CHARACTER(LEN=32) :: exxdiv_treatment, Hubbard_projectors
LOGICAL :: ldftd3
INTEGER :: nq1, nq2, nq3, lda_plus_U_kind
REAL(dp):: exx_fraction, screening_parameter, ecutfock, ecutvcut,local_thr
LOGICAL :: x_gamma_extrapolation
REAL(dp):: hubbard_dum(3,nsp)
REAL(dp):: hubbard_dum(3,nsp), hubba_dum(nsp), hubba_dum_dum(1,1,1)
LOGICAL :: backall_dum(nsp)
INTEGER :: hub_l2_dum(nsp), hub_l3_dum(nsp), hub_lmax_back_dum
CHARACTER(LEN=6), EXTERNAL :: int_to_char
INTEGER, POINTER :: hub_l_back_dum(:), hub_l1_back_dum(:), hub_lmax_back_dum
LOGICAL, POINTER :: backall_dum(:)
REAL(dp), POINTER :: hubba_dum(:)
!
!
NULLIFY (hub_l_back_dum, hub_l1_back_dum, hub_lmax_back_dum, backall_dum, hubba_dum)
dirname = restart_dir(ndr)
filename= xmlfile(ndr)
INQUIRE ( file=filename, exist=found )
@ -778,15 +782,15 @@ MODULE cp_restart_new
CALL qexsd_copy_dft ( output_obj%dft, nsp, atm, dft_name, &
nq1, nq2, nq3, ecutfock, exx_fraction, screening_parameter, &
exxdiv_treatment, x_gamma_extrapolation, ecutvcut, local_thr, &
lda_plus_U, lda_plus_U_kind, U_projection, Hubbard_l, Hubbard_lmax,&
hub_l_back_dum, hub_l1_back_dum, backall_dum, hub_lmax_back_dum, hubba_dum, &
lda_plus_U, lda_plus_U_kind, Hubbard_projectors, Hubbard_n, Hubbard_l, Hubbard_lmax,&
hub_l2_dum, hub_l2_dum, hub_l2_dum, hub_l2_dum, backall_dum, hub_lmax_back_dum, hubba_dum, &
Hubbard_U, hubba_dum, Hubbard_dum(1,:), Hubbard_dum(2,:), Hubbard_dum(3,:), &
Hubbard_dum, &
vdw_corr, scal6, lon_rcut, vdw_isolated)
HUBBARD_J = Hubbard_dum, HUBBARD_V = hubba_dum_dum, VDW_CORR = vdw_corr, SCAL6 = scal6, &
LON_RCUT =lon_rcut, VDW_ISOLATED = vdw_isolated )
CALL set_vdw_corr (vdw_corr, llondon, ldftd3, ts_vdw, mbd_vdw, lxdm )
IF ( ldftd3 ) CALL errore('cp_readfile','DFT-D3 not implemented',1)
!
lsda_ = output_obj%magnetization%lsda
lsda_ = output_obj%magnetization_ispresent .AND. output_obj%magnetization%lsda
IF ( lsda_ .AND. (nspin /= 2) ) CALL errore('cp_readfile','wrong spin',1)
!
nbnd_ = nupdwn(1)
@ -841,11 +845,8 @@ MODULE cp_restart_new
!------------------------------------------------------------------------
! ... Cell related variables, CP-specific
!
USE FoX_wxml
USE ions_base, ONLY: nat
!
IMPLICIT NONE
!
TYPE(xmlf_t), INTENT(INOUT) :: xf
INTEGER, INTENT(IN) :: nfi ! index of the current step
REAL(DP), INTENT(IN) :: simtime ! simulated time
@ -1071,7 +1072,6 @@ MODULE cp_restart_new
!
USE kinds, ONLY : dp
USE io_global, ONLY : ionode
USE FoX_wxml
USE cell_base, ONLY : ainv ! what is this? what is the relation with h?
!
REAL(DP), INTENT(IN) :: h(:,:), wfc(:,:)
@ -1126,8 +1126,6 @@ MODULE cp_restart_new
USE gvecw, ONLY : ngw, ngw_g
USE gvect, ONLY : ig_l2g
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: ndr
INTEGER, INTENT(IN) :: ik, iss, nk, nspin
CHARACTER, INTENT(IN) :: tag
@ -1180,9 +1178,6 @@ MODULE cp_restart_new
! ... ierr = 1: error reading MD status
! ... ierr = 2: error reading timestep info
!
USE FoX_dom
!
IMPLICIT NONE
!
TYPE(Node),POINTER,INTENT(IN) :: root
INTEGER, INTENT(IN) :: nat
@ -1395,7 +1390,6 @@ MODULE cp_restart_new
!
USE kinds, ONLY : dp
USE io_global, ONLY : stdout
USE FoX_dom
!
TYPE(Node), POINTER, INTENT(IN) :: root
REAL(DP), INTENT(OUT):: wfc(:,:)
@ -1436,13 +1430,9 @@ MODULE cp_restart_new
!
USE parameters, ONLY : ntypx
USE ions_base, ONLY : nat
USE FoX_dom, ONLY : Node, parseFile, item, getElementsByTagname, extractDataAttribute, &
extractDataContent, destroy
USE qexsd_copy, ONLY : qexsd_copy_atomic_structure
USE qes_read_module, ONLY : qes_read
!
IMPLICIT NONE
!
INTEGER, INTENT(IN) :: ndr
LOGICAL, INTENT(IN) :: ascii
REAL(DP), INTENT(INOUT) :: ht(3,3)
@ -1584,7 +1574,6 @@ MODULE cp_restart_new
USE io_global, ONLY : ionode, ionode_id
USE cp_main_variables, ONLY : idesc
!
IMPLICIT NONE
CHARACTER(LEN=*), INTENT(in) :: filename
INTEGER, INTENT(in) :: iunpun, iss, nspin, nudx
REAL(dp), INTENT(in) :: lambda(:,:)
@ -1624,8 +1613,6 @@ MODULE cp_restart_new
USE io_global, ONLY : ionode, ionode_id
USE cp_main_variables, ONLY : idesc
!
IMPLICIT NONE
include 'laxlib.fh'
CHARACTER(LEN=*), INTENT(in) :: filename
@ -1670,8 +1657,6 @@ MODULE cp_restart_new
USE cp_main_variables, ONLY : idesc
USE electrons_base,ONLY: nspin, nudx
!
IMPLICIT NONE
include 'laxlib.fh'
REAL(dp), INTENT(in) :: mat_z(:,:,:)
@ -1721,8 +1706,6 @@ MODULE cp_restart_new
USE cp_main_variables, ONLY : idesc
USE electrons_base,ONLY: nspin, nudx
!
IMPLICIT NONE
include 'laxlib.fh'
REAL(dp), INTENT(out) :: mat_z(:,:,:)

View File

@ -1676,7 +1676,7 @@ end subroutine dylmr2_
USE constants, ONLY: pi, fpi
USE gvecw, ONLY: ngw
USE gvect, ONLY: gstart
USE gvecw, ONLY: g2kin_d
USE gvecw, ONLY: g2kin
USE mp, ONLY: mp_sum
USE mp_global, ONLY: intra_bgrp_comm
USE cell_base, ONLY: tpiba2
@ -1696,10 +1696,10 @@ end subroutine dylmr2_
REAL(DP) :: sk
!
sk=0.0d0
!$cuf kernel do(2) <<<*,*>>>
!$acc parallel loop collapse(2) present(g2kin, f, c)
DO i=1,n
DO ig=gstart,ngw
sk = sk + f(i) * DBLE(CONJG(c(ig,i))*c(ig,i)) * g2kin_d(ig)
sk = sk + f(i) * DBLE(CONJG(c(ig,i))*c(ig,i)) * g2kin(ig)
END DO
END DO

View File

@ -23,7 +23,6 @@ SUBROUTINE cprmain( tau_out, fion_out, etot_out )
!autopilot work
USE core, ONLY : rhoc
USE uspp_param, ONLY : nhm, nh
USE pseudo_base, ONLY : vkb_d
USE uspp, ONLY : nkb, vkb, becsum, deeq, okvan, nlcc_any
USE energies, ONLY : eht, epseu, exc, etot, eself, enl, &
ekin, atot, entropy, egrand, enthal, &
@ -123,6 +122,11 @@ USE cp_main_variables, ONLY : eigr_d
USE xc_lib, ONLY : xclib_dft_is, start_exx, exx_is_active
USE device_memcpy_m, ONLY : dev_memcpy
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : update_environ_ions
#endif
!
IMPLICIT NONE
!
include 'laxlib.fh'
@ -314,7 +318,9 @@ USE cp_main_variables, ONLY : eigr_d
!
! ... pass ions information to plugins
!
CALL plugin_init_ions( tau0 )
#if defined (__ENVIRON)
IF (use_environ) CALL update_environ_ions(tau0)
#endif
!
IF ( lda_plus_u ) then
! forceh ! Forces on ions due to Hubbard U
@ -538,9 +544,8 @@ USE cp_main_variables, ONLY : eigr_d
! ... prefor calculates vkb
!
CALL prefor( eigr, vkb )
#if defined(__CUDA)
CALL dev_memcpy( vkb_d, vkb )
#endif
!
!$acc update device(vkb)
!
END IF
!
@ -558,7 +563,11 @@ USE cp_main_variables, ONLY : eigr_d
IF ( tortho ) THEN
!
#if defined (__CUDA)
CALL ortho( vkb_d, cm_d, phi, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL ortho( vkb, cm_d, phi, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
!$acc end host_data
!$acc end data
#else
CALL ortho( vkb, cm_bgrp, phi, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
#endif
@ -597,8 +606,11 @@ USE cp_main_variables, ONLY : eigr_d
#if defined (__CUDA)
CALL dev_memcpy( eigr_d, eigr )
!CALL dev_memcpy( cm_d, cm_bgrp )
CALL calbec( nbsp_bgrp, vkb_d, cm_d, bec_d, 1 )
!CALL dev_memcpy( vkb, vkb_d )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL calbec( nbsp_bgrp, vkb, cm_d, bec_d, 1 )
!$acc end host_data
!$acc end data
!CALL dev_memcpy( bec_bgrp, bec_d )
#else
CALL calbec( nbsp_bgrp, vkb, cm_bgrp, bec_bgrp, 1 )
@ -612,8 +624,8 @@ USE cp_main_variables, ONLY : eigr_d
#endif
END IF
!
!$acc update host(vkb)
#if defined (__CUDA)
CALL dev_memcpy( vkb, vkb_d )
CALL dev_memcpy( bec_bgrp, bec_d )
#endif
!
@ -845,7 +857,9 @@ USE cp_main_variables, ONLY : eigr_d
IF ( tefield ) CALL efield_update( eigr )
IF ( tefield2 ) CALL efield_update2( eigr )
!
CALL plugin_init_ions( tau0 )
#if defined (__ENVIRON)
IF (use_environ) CALL update_environ_ions(tau0)
#endif
!
lambdam = lambda
!
@ -1018,6 +1032,11 @@ SUBROUTINE terminate_run()
USE exx_module, ONLY : exx_finalize
USE xc_lib, ONLY : xclib_dft_is, exx_is_active
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : print_environ_clocks
#endif
!
IMPLICIT NONE
!
! ... print statistics
@ -1166,7 +1185,9 @@ SUBROUTINE terminate_run()
!
IF (tcg) call print_clock_tcg()
!
CALL plugin_clock()
#if defined (__ENVIRON)
IF (use_environ) CALL print_environ_clocks()
#endif
!
CALL mp_report()
!

View File

@ -20,7 +20,7 @@ PROGRAM main
!
USE input, ONLY : iosys_pseudo, iosys
USE io_global, ONLY : ionode, ionode_id
USE environment, ONLY : environment_start
USE environment, ONLY : environment_start, print_cuda_info
USE check_stop, ONLY : check_stop_init
USE mp_global, ONLY : mp_startup
USE mp_images, ONLY : intra_image_comm
@ -38,6 +38,7 @@ PROGRAM main
! ... start the environment
!
CALL environment_start( 'CP' )
CALL print_cuda_info()
!
! reading plugin arguments
!

View File

@ -12,6 +12,11 @@
USE kinds
USE xc_lib, ONLY : xclib_dft_is, xclib_get_exx_fraction
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : print_environ_energies
#endif
IMPLICIT NONE
SAVE
@ -198,7 +203,9 @@
IF( textfor ) WRITE( stdout, 16 ) eextfor
END IF
!
CALL plugin_print_energies()
#if defined (__ENVIRON)
IF (use_environ) CALL print_environ_energies('CP')
#endif
!
1 FORMAT(6X,' total energy = ',F18.10,' Hartree a.u.')
2 FORMAT(6X,' kinetic energy = ',F18.10,' Hartree a.u.')

View File

@ -0,0 +1,118 @@
!----------------------------------------------------------------------------------------
!>
!!
!----------------------------------------------------------------------------------------
MODULE environ_cp_module
!------------------------------------------------------------------------------------
#if defined (__ENVIRON)
!
USE environ_base_module, ONLY: environ
!
USE kinds, ONLY: DP
USE global_version, ONLY: version_number
USE io_global, ONLY: stdout
!
USE fft_base, ONLY: dfftp
!
USE electrons_base, ONLY: nspin, nelt
!
!------------------------------------------------------------------------------------
!
IMPLICIT NONE
!
PRIVATE
!
PUBLIC :: add_environ_potential, calc_environ_potential
!
!------------------------------------------------------------------------------------
CONTAINS
!------------------------------------------------------------------------------------
!>
!!
!------------------------------------------------------------------------------------
SUBROUTINE add_environ_potential(v)
!--------------------------------------------------------------------------------
!
IMPLICIT NONE
!
REAL(DP), INTENT(INOUT) :: v(dfftp%nnr, nspin)
!
INTEGER :: i
REAL(DP) :: dvtot(dfftp%nnr)
!
CHARACTER(LEN=80) :: routine = 'add_environ_potential'
!
!--------------------------------------------------------------------------------
!
dvtot = 0.5D0 * environ%main%get_dvtot(dfftp%nnr) ! Rydberg to Hartree
!
IF (nspin .EQ. 1) THEN
!
!$omp parallel do
DO i = 1, dfftp%nnr
v(i, 1) = v(i, 1) + dvtot(i)
END DO
!$omp end parallel do
!
ELSE IF (nspin .EQ. 2) THEN
!
!$omp parallel do
DO i = 1, dfftp%nnr
v(i, 1) = v(i, 1) + dvtot(i)
v(i, 2) = v(i, 2) + dvtot(i)
END DO
!$omp end parallel do
!
END IF
!
!--------------------------------------------------------------------------------
END SUBROUTINE add_environ_potential
!------------------------------------------------------------------------------------
!>
!!
!------------------------------------------------------------------------------------
SUBROUTINE calc_environ_potential(rhoin, nfi)
!--------------------------------------------------------------------------------
!
IMPLICIT NONE
!
REAL(DP), INTENT(IN) :: rhoin(dfftp%nnr, nspin)
INTEGER, INTENT(IN) :: nfi
!
LOGICAL :: update_venviron = .FALSE.
REAL(DP) :: rhoaux(dfftp%nnr)
!
CHARACTER(LEN=80) :: routine = 'calc_environ_potential'
!
!--------------------------------------------------------------------------------
! update electrons-related quantities in environ
!
rhoaux = rhoin(:, 1)
!
IF (version_number == '6.3') THEN
IF (nspin == 2) rhoaux = rhoaux + rhoin(:, 2)
END IF
!
CALL environ%main%update_electrons(dfftp%nnr, rhoaux, REAL(nelt, DP))
!
!--------------------------------------------------------------------------------
! Environ contribution to the local potential
!
update_venviron = nfi > environ%setup%get_nskip() .OR. environ%setup%is_restart()
!
IF (update_venviron .AND. environ%get_verbosity() > 1) WRITE (stdout, 1000)
!
CALL environ%calc%potential(update_venviron)
!
!--------------------------------------------------------------------------------
!
1000 FORMAT(/, 5X, "add environment contribution to local potential")
!
!--------------------------------------------------------------------------------
END SUBROUTINE calc_environ_potential
!------------------------------------------------------------------------------------
!
#endif
!------------------------------------------------------------------------------------
END MODULE environ_cp_module
!----------------------------------------------------------------------------------------

View File

@ -358,7 +358,7 @@
USE uspp_param, ONLY: nhm, nh
USE constants, ONLY: pi, fpi
USE ions_base, ONLY: nsp, na, nat, ityp
USE gvecw, ONLY: ngw, g2kin_d
USE gvecw, ONLY: ngw, g2kin
USE cell_base, ONLY: tpiba2
USE ensemble_dft, ONLY: tens
USE xc_lib, ONLY: xclib_dft_is, exx_is_active
@ -492,10 +492,10 @@
fip = -0.5d0*f(i+idx)
endif
CALL fftx_psi2c_gamma_gpu( dffts, psi( 1+ioff : ioff+dffts%nnr ), df_d(1+igno:igno+ngw), da_d(1+igno:igno+ngw))
!$cuf kernel do(1)
!$acc parallel loop present(g2kin, da_d, df_d, c)
DO ig=1,ngw
df_d(ig+igno)= fi*(tpiba2*g2kin_d(ig)* c(ig,idx+i-1)+df_d(ig+igno))
da_d(ig+igno)=fip*(tpiba2*g2kin_d(ig)* c(ig,idx+i )+da_d(ig+igno))
df_d(ig+igno)= fi*(tpiba2*g2kin(ig)* c(ig,idx+i-1)+df_d(ig+igno))
da_d(ig+igno)=fip*(tpiba2*g2kin(ig)* c(ig,idx+i )+da_d(ig+igno))
END DO
END IF

View File

@ -29,7 +29,6 @@ SUBROUTINE from_scratch( )
USE energies, ONLY : entropy, eself, enl, ekin, enthal, etot, ekincm
USE energies, ONLY : dft_energy_type, debug_energies
USE dener, ONLY : denl, denl6, dekin6, detot
USE pseudo_base, ONLY : vkb_d
USE uspp, ONLY : vkb, becsum, deeq, nkb, okvan, nlcc_any
USE io_global, ONLY : stdout, ionode
USE core, ONLY : rhoc
@ -62,6 +61,11 @@ SUBROUTINE from_scratch( )
USE matrix_inversion
USE device_memcpy_m, ONLY : dev_memcpy
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : update_environ_ions
#endif
!
IMPLICIT NONE
!
@ -125,7 +129,9 @@ SUBROUTINE from_scratch( )
!
! pass ions informations to plugins
!
CALL plugin_init_ions( tau0 )
#if defined (__ENVIRON)
IF (use_environ) CALL update_environ_ions(tau0)
#endif
!
! wfc initialization with random numbers
!
@ -137,9 +143,8 @@ SUBROUTINE from_scratch( )
! ... prefor calculates vkb (used by gram)
!
CALL prefor( eigr, vkb )
#if defined(__CUDA)
CALL dev_memcpy( vkb_d, vkb )
#endif
!
!$acc update device(vkb)
!
nspin_wfc = nspin
IF( force_pairing ) nspin_wfc = 1
@ -263,7 +268,11 @@ SUBROUTINE from_scratch( )
!
IF( ttforce ) THEN
#if defined (__CUDA)
CALL nlfq_bgrp( cm_d, vkb_d, bec_bgrp, becdr_bgrp, fion )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL nlfq_bgrp( cm_d, vkb, bec_bgrp, becdr_bgrp, fion )
!$acc end host_data
!$acc end data
#else
CALL nlfq_bgrp( cm_bgrp, vkb, bec_bgrp, becdr_bgrp, fion )
#endif
@ -273,7 +282,11 @@ SUBROUTINE from_scratch( )
! the electron mass rises with g**2
!
#if defined (__CUDA)
CALL calphi_bgrp( cm_d, ngw, bec_bgrp, nkb, vkb_d, phi, nbspx_bgrp, ema0bg )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL calphi_bgrp( cm_d, ngw, bec_bgrp, nkb, vkb, phi, nbspx_bgrp, ema0bg )
!$acc end host_data
!$acc end data
#else
CALL calphi_bgrp( cm_bgrp, ngw, bec_bgrp, nkb, vkb, phi, nbspx_bgrp, ema0bg )
#endif
@ -285,7 +298,11 @@ SUBROUTINE from_scratch( )
!
if( tortho ) then
#if defined (__CUDA)
CALL ortho( vkb_d, c0_d, phi, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL ortho( vkb, c0_d, phi, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
!$acc end host_data
!$acc end data
#else
CALL ortho( vkb, c0_bgrp, phi, lambda, idesc, bigr, iter, ccc, bephi, becp_bgrp )
#endif

View File

@ -5,22 +5,7 @@
! in the root directory of the present distribution,
! or http://www.gnu.org/copyleft/gpl.txt .
!
#if defined (_OPENACC)
#ifndef __OPENACC
#define __OPENACC
#endif
#endif
#if defined (__OPENACC)
#define DEV_ACC !$acc
#define DEV_OMP !!!
#define START_WSHARE DEV_ACC kernels
#define END_WSHARE DEV_ACC end kernels
#else
#define DEV_ACC !!!
#define DEV_OMP !$omp
#define START_WSHARE DEV_OMP workshare
#define END_WSHARE DEV_OMP end workshare
#endif
#include <cpv_device_macros.h>
!-------------------------------------------------------------------------
SUBROUTINE gram_bgrp( betae, bec_bgrp, nkbx, cp_bgrp, ngwx )
!-----------------------------------------------------------------------
@ -70,7 +55,7 @@ SUBROUTINE gram_bgrp( betae, bec_bgrp, nkbx, cp_bgrp, ngwx )
ALLOCATE( bec_tmp( nkbx ) )
ALLOCATE( csc2( SIZE( csc ) ) )
!
CALL set_uspp_stuff(tvanp, all_tvanp, nqq, iqq, nsp, upf, nh, qq_nt)
tvanp(1:nsp) = upf(1:nsp)%tvanp
CALL block_distribute(nat, me_bgrp, nproc_bgrp, ia_s, ia_e, mykey)
DEV_ACC data copy(cp_bgrp, bec_bgrp) create(ctmp, cp_tmp,bec_tmp,csc,csc2) &
DEV_ACC & copyin(betae, qq_nt, ofsbeta,ityp, ibgrp_g2l, nh, tvanp )
@ -94,7 +79,7 @@ DEV_ACC end kernels
END IF
!
IF( nbgrp_im1 > 0 .AND. ngw > 0 ) THEN
#if defined (__OPENACC)
#if defined (__CUDA) && defined (_OPENACC)
DEV_ACC host_data use_device(cp_bgrp, csc, ctmp)
CALL mydgemv( 'N', 2*ngw, nbgrp_im1, mone, cp_bgrp(1,iupdwn_bgrp(iss)), 2*ngwx, csc, 1, one, ctmp, 1 )
DEV_ACC end host_data
@ -123,46 +108,12 @@ DEV_ACC end data
DEALLOCATE( csc2 )
DEALLOCATE( bec_tmp )
DEALLOCATE( cp_tmp )
DEALLOCATE (iqq)
CALL stop_clock( 'gram' )
!
RETURN
CONTAINS
!-----------------------------------------------------------------------
SUBROUTINE set_uspp_stuff(tvanp, all_tvanp, nqq, iqq, nsp, upf, nh, qq_nt)
!-----------------------------------------------------------------------
USE pseudo_types, ONLY: pseudo_upf
IMPLICIT NONE
INTEGER,INTENT(IN) :: nsp
TYPE(PSEUDO_UPF),INTENT(IN) :: upf(nsp)
INTEGER,INTENT(IN) :: nh(nsp)
REAL(DP),INTENT(IN) :: qq_nt(:,:,:)
LOGICAL,INTENT(OUT) :: tvanp(nsp)
LOGICAL,INTENT(OUT) :: all_tvanp
INTEGER,ALLOCATABLE,INTENT(OUT) :: nqq(:)
INTEGER,ALLOCATABLE :: iqq(:,:)
!
INTEGER :: nhx,is,iv, jv
nhx = MAXVAL(nh(1:nsp))
ALLOCATE (iqq(2,nhx*nhx),nqq(nsp))
tvanp(1:nsp) = upf(1:nsp)%tvanp
all_tvanp = ALL(tvanp(1:nsp))
nqq = 0
DO is = 1, nsp
DO iv = 1, nh(is)
DO jv =1, nh(is)
IF (ABS(qq_nt(iv,jv,is)) .GT. 1.e-5) THEN
nqq(is) = nqq(is) + 1
iqq(:,nqq(is)) =[iv,jv]
END IF
END DO
END DO
END DO
END SUBROUTINE set_uspp_stuff
CONTAINS
!-----------------------------------------------------------------------
FUNCTION cscnorm( bec, cp, i, n )
!-----------------------------------------------------------------------
@ -190,7 +141,7 @@ CONTAINS
REAL(DP), EXTERNAL :: myddot
!
DEV_ACC data present(bec, cp, tvanp,ofsbeta, nh, ityp, qq_nt)
#if defined(__OPENACC)
#if defined(__CUDA) && defined(_OPENACC)
DEV_ACC host_data use_device(cp)
rsum = 2.d0 * myddot(2*ngw,cp(1,i),1,cp(1,i),1)
DEV_ACC end host_data
@ -293,7 +244,7 @@ DEV_ACC end host_data
kmax_bgrp = kmax_bgrp - iupdwn_bgrp(iss) + 1
IF( kmax_bgrp > 0 .AND. ngw > 0 ) THEN
#if defined(__OPENACC)
#if defined(__CUDA) && defined (_OPENACC)
DEV_ACC host_data use_device(cp_bgrp, cp_tmp, csc2)
CALL mydgemv( 'T', 2*ngw, kmax_bgrp, 1.0d0, cp_bgrp(1,iupdwn_bgrp(iss)), 2*ngwx, cp_tmp, 1, 0.0d0, csc2, 1 )
DEV_ACC end host_data
@ -434,7 +385,7 @@ DEV_ACC serial present(ibgrp_g2l, csc)
DEV_ACC end serial
IF( nk > 0 .AND. ngw > 0 ) THEN
#if defined (__OPENACC)
#if defined (__CUDA) && (_OPENACC)
DEV_ACC data copyin(bec_bgrp, csc) copyout(bec_tmp)
DEV_ACC host_data use_device(bec_bgrp, csc, bec_tmp)
CALL mydgemv( 'N', nkbx, nk, -1.0d0, bec_bgrp(1,iupdwn_bgrp(iss)), nkbx, csc, 1, 0.0d0, bec_tmp, 1 )

View File

@ -400,6 +400,11 @@
USE smallbox_subs, ONLY : gcalb
USE io_global, ONLY : stdout, ionode
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : update_environ_cell
#endif
!
implicit none
!
REAL(DP), INTENT(IN) :: h(3,3)
@ -408,6 +413,10 @@
REAL(DP) :: rat1, rat2, rat3
INTEGER :: ig, dfftp_ngm
!
#if defined (__ENVIRON)
REAL(DP) :: at_scaled(3, 3)
#endif
!
!WRITE( stdout, "(4x,'h from newinit')" )
!do i=1,3
! WRITE( stdout, '(3(4x,f12.7)' ) (h(i,j),j=1,3)
@ -443,7 +452,12 @@
!
! pass new cell parameters to plugins
!
CALL plugin_init_cell( )
#if defined (__ENVIRON)
IF (use_environ) THEN
at_scaled = at * alat
CALL update_environ_cell(at_scaled)
END IF
#endif
!
return
end subroutine newinit_x

View File

@ -23,10 +23,9 @@ SUBROUTINE init_run()
vels, velsm, velsp, fion, fionm
USE gvecw, ONLY : ngw, ngw_g, g2kin, g2kin_init
USE smallbox_gvec, ONLY : ngb
USE gvect, ONLY : gstart, gg
USE gvect, ONLY : gstart, gg, gcutm
USE fft_base, ONLY : dfftp, dffts
USE electrons_base, ONLY : nspin, nbsp, nbspx, nupdwn, f
USE pseudo_base, ONLY : vkb_d
USE uspp, ONLY : nkb, vkb, deeq, becsum,nkbus
USE core, ONLY : rhoc
USE wavefunctions, ONLY : c0_bgrp, cm_bgrp, allocate_cp_wavefunctions
@ -84,6 +83,11 @@ SUBROUTINE init_run()
USE exx_module, ONLY : exx_initialize
#if defined (__CUDA)
USE cudafor
#endif
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : init_environ_base
#endif
!
IMPLICIT NONE
@ -93,6 +97,10 @@ SUBROUTINE init_run()
REAL(DP) :: a1(3), a2(3), a3(3)
LOGICAL :: ftest
!
#if defined (__ENVIRON)
REAL(DP) :: at_scaled(3, 3)
REAL(DP) :: gcutm_scaled
#endif
!
CALL start_clock( 'initialize' )
!
@ -124,7 +132,13 @@ SUBROUTINE init_run()
!
! ... initialization of plugin variables and arrays
!
CALL plugin_init_base()
#if defined (__ENVIRON)
IF (use_environ) THEN
at_scaled = at * alat
gcutm_scaled = gcutm / alat**2
CALL init_environ_base(at_scaled, gcutm_scaled)
END IF
#endif
!
! ... initialize atomic positions and cell
!
@ -215,9 +229,7 @@ SUBROUTINE init_run()
ALLOCATE( deeq( nhm, nhm, nat, nspin ) )
!
ALLOCATE( vkb( ngw, nkb ) )
#if defined(_CUDA)
ALLOCATE( vkb_d( ngw, nkb ) )
#endif
!$acc enter data create(vkb(1:ngw,1:nkb))
!
IF ( xclib_dft_is('meta') .AND. tens ) &
CALL errore( ' init_run ', 'ensemble_dft not implemented for metaGGA', 1 )

View File

@ -215,6 +215,11 @@ MODULE input
memory, ref_cell, tcpbo, max_seconds, pre_state
USE xc_lib, ONLY : xclib_dft_is
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : read_environ_input, init_environ_setup
#endif
!
IMPLICIT NONE
!
!
@ -693,7 +698,12 @@ MODULE input
! ... having set all input keywords, read plugins' input file(s)
CALL plugin_read_input()
#if defined (__ENVIRON)
IF (use_environ) THEN
CALL read_environ_input()
CALL init_environ_setup()
END IF
#endif
!
! ... the 'ATOMIC_SPECIES' card must be present, check it
@ -763,10 +773,11 @@ MODULE input
step_rad, Surf_t, dthr, R_j, h_j, &
delta_eps, delta_sigma, n_cntr, &
axis
USE input_parameters, ONLY : lda_plus_u, Hubbard_U
USE input_parameters, ONLY : lda_plus_u, Hubbard_U, Hubbard_l, Hubbard_n
USE input_parameters, ONLY : step_pen, A_pen, alpha_pen, sigma_pen
USE input_parameters, ONLY : vdw_corr, london, london_s6, london_rcut, &
ts_vdw, ts_vdw_isolated, ts_vdw_econv_thr
USE input_parameters, ONLY : exx_fraction, screening_parameter
!
USE constants, ONLY : amu_au, pi
USE control_flags, ONLY : lconstrain, tpre, thdyn, tksw
@ -796,6 +807,7 @@ MODULE input
USE control_flags, ONLY : llondon, ts_vdw_ => ts_vdw
USE london_module, ONLY : init_london, scal6, lon_rcut
USE tsvdw_module, ONLY : vdw_isolated, vdw_econv_thr
USE xc_lib, ONLY : xclib_set_exx_fraction, set_screening_parameter
!
IMPLICIT NONE
!
@ -931,7 +943,7 @@ MODULE input
!
! ... initialize variables for lda+U calculations
!
CALL ldaU_init0 ( ntyp, lda_plus_u, Hubbard_U )
CALL ldaU_init0 ( ntyp, lda_plus_u, Hubbard_U, Hubbard_l, Hubbard_n )
CALL ldaUpen_init( SIZE(sigma_pen), step_pen, sigma_pen, alpha_pen, A_pen )
!
! ... initialize variables for vdW (dispersions) corrections
@ -974,6 +986,14 @@ MODULE input
vdw_econv_thr= ts_vdw_econv_thr
END IF
!
! ... must be done AFTER dft is read from PP files and initialized
! ... or else the two following parameters will be overwritten
!
IF (exx_fraction >= 0.0_DP) CALL xclib_set_exx_fraction (exx_fraction)
!
IF (screening_parameter >= 0.0_DP) &
& CALL set_screening_parameter(screening_parameter)
!
RETURN
!
END SUBROUTINE modules_setup
@ -1039,6 +1059,10 @@ MODULE input
USE io_global, ONLY: ionode, stdout
USE time_step, ONLY: delt
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : print_environ_summary
#endif
!
IMPLICIT NONE
@ -1110,7 +1134,9 @@ MODULE input
!
! CALL sic_info() ! maybe useful
!
CALL plugin_print_info( )
#if defined (__ENVIRON)
IF (use_environ) CALL print_environ_summary()
#endif
!
IF(tefield) call efield_info( )
IF(tefield2) call efield_info2( )

View File

@ -71,7 +71,6 @@
!
implicit none
integer is, nb, l
integer, external :: set_hubbard_l
IF ( .NOT.lda_plus_u ) RETURN
! FIXME: wasteful allocation, should be removed
@ -81,7 +80,7 @@
Hubbard_lmax = -1
do is=1,nsp
if (Hubbard_U(is).ne.0.d0) then
Hubbard_l(is) = set_hubbard_l( upf(is)%psd )
! Hubbard_l is read from the HUBBARD card in the input file
Hubbard_lmax = max(Hubbard_lmax,Hubbard_l(is))
write (6,*) ' HUBBARD L FOR TYPE ',atm(is),' IS ', Hubbard_l(is)
else

View File

@ -15,13 +15,13 @@ MODULE ldaU_cp
real(DP) :: Hubbard_U(nsx)
real(DP) :: e_hubbard = 0.d0
real(DP), allocatable :: ns(:,:,:,:)
integer :: Hubbard_l(nsx), Hubbard_lmax=0, ldmx=0, nwfcU
integer :: Hubbard_l(nsx), Hubbard_n(nsx), Hubbard_lmax=0, ldmx=0, nwfcU
logical :: lda_plus_u
COMPLEX(DP), allocatable:: vupsi(:,:)
!
contains
!
subroutine ldaU_init0 ( nsp, lda_plus_u_, Hubbard_U_ )
subroutine ldaU_init0 ( nsp, lda_plus_u_, Hubbard_U_, Hubbard_l_, Hubbard_n_ )
!-----------------------------------------------------------------------
!
USE constants, ONLY: autoev
@ -30,9 +30,13 @@ contains
INTEGER, INTENT(IN) :: nsp
LOGICAL, INTENT(IN) :: lda_plus_u_
REAL(DP),INTENT(IN) :: Hubbard_U_(nsp)
INTEGER, INTENT(IN) :: Hubbard_l_(nsp)
INTEGER, INTENT(IN) :: Hubbard_n_(nsp)
lda_plus_u = lda_plus_u_
Hubbard_U(1:nsp) = Hubbard_U_(1:nsp) / autoev
Hubbard_l(1:nsp) = Hubbard_l_(1:nsp)
Hubbard_n(1:nsp) = Hubbard_n_(1:nsp)
!
END SUBROUTINE ldaU_init0
!

View File

@ -20,7 +20,6 @@ SUBROUTINE move_electrons_x( nfi, tprint, tfirst, tlast, b1, b2, b3, fion, &
drhog, sfac, ema0bg, bec_bgrp, becdr_bgrp, &
taub, lambda, lambdam, lambdap, vpot, dbec, idesc
USE cell_base, ONLY : omega, ibrav, h, press
USE pseudo_base, ONLY : vkb_d
USE uspp, ONLY : becsum, vkb, nkb, nlcc_any
USE energies, ONLY : ekin, enl, entropy, etot
USE electrons_base, ONLY : nbsp, nspin, f, nudx, nupdwn, nbspx_bgrp, nbsp_bgrp
@ -157,10 +156,9 @@ SUBROUTINE move_electrons_x( nfi, tprint, tfirst, tlast, b1, b2, b3, fion, &
!
CALL newd( vpot, becsum, fion, tprint )
!
CALL prefor( eigr, vkb )
#if defined(__CUDA)
CALL dev_memcpy( vkb_d, vkb )
#endif
CALL prefor( eigr, vkb )
!
!$acc update device(vkb)
!
IF( force_pairing ) THEN
!
@ -185,7 +183,11 @@ SUBROUTINE move_electrons_x( nfi, tprint, tfirst, tlast, b1, b2, b3, fion, &
!
IF ( tfor .OR. ( tprnfor .AND. tprint ) ) THEN
#if defined (__CUDA)
CALL nlfq_bgrp( c0_d, vkb_d, bec_bgrp, becdr_bgrp, fion )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL nlfq_bgrp( c0_d, vkb, bec_bgrp, becdr_bgrp, fion )
!$acc end host_data
!$acc end data
#else
CALL nlfq_bgrp( c0_bgrp, vkb, bec_bgrp, becdr_bgrp, fion )
#endif
@ -212,7 +214,11 @@ SUBROUTINE move_electrons_x( nfi, tprint, tfirst, tlast, b1, b2, b3, fion, &
! ... the electron mass rises with g**2
!
#if defined (__CUDA)
CALL calphi_bgrp( c0_d, ngw, bec_bgrp, nkb, vkb_d, phi, nbspx_bgrp, ema0bg )
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL calphi_bgrp( c0_d, ngw, bec_bgrp, nkb, vkb, phi, nbspx_bgrp, ema0bg )
!$acc end host_data
!$acc end data
#else
CALL calphi_bgrp( c0_bgrp, ngw, bec_bgrp, nkb, vkb, phi, nbspx_bgrp, ema0bg )
#endif

View File

@ -357,7 +357,7 @@
!
DEV_ACC data present(rhoeg, rhops, mill,g ) copy(fion) create(rp(s_ngm_)) copyin(sfac, screen_coul, gg, vps, ityp,ei1, ei2, ei3)
DEV_ACC data present(rhoeg, rhops, mill,g ) copy(fion) create(rp(1:s_ngm_)) copyin(sfac, screen_coul, gg, vps, ityp,ei1, ei2, ei3)
!
DEV_OMP parallel default(none) &
DEV_OMP shared(gstart, dffts,sfac, rhops, screen_coul, rhoeg, nsp, gg, tpiba2, tpiba, mill, g, &

View File

@ -24,21 +24,6 @@
PUBLIC :: compute_rhops, formfn, formfa
PUBLIC :: compute_eself, compute_rhocg
!FIXME device variable added here because cp implementation
! differs from the one of PW. Work yet to be done to remove it
! from cp
PUBLIC :: vkb_d
COMPLEX(DP), ALLOCATABLE :: vkb_d(:,:)
#if defined(__CUDA)
attributes (DEVICE) :: vkb_d
#endif
!=----------------------------------------------------------------------------=!
CONTAINS
!=----------------------------------------------------------------------------=!

View File

@ -33,7 +33,6 @@ SUBROUTINE from_restart( )
USE wave_base, ONLY : rande_base
USE efield_module, ONLY : efield_berry_setup, tefield, &
efield_berry_setup2, tefield2
USE pseudo_base, ONLY : vkb_d
USE uspp, ONLY : okvan, vkb, nkb, nlcc_any
USE cp_main_variables, ONLY : ht0, htm, lambdap, lambda, lambdam, eigr, &
sfac, taub, irb, eigrb, edft, bec_bgrp, dbec, idesc, iabox, nabox
@ -42,6 +41,11 @@ SUBROUTINE from_restart( )
USE device_memcpy_m, ONLY : dev_memcpy
USE matrix_inversion
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : update_environ_ions
#endif
!
IMPLICIT NONE
INTEGER :: iss
@ -164,9 +168,8 @@ SUBROUTINE from_restart( )
CALL strucf( sfac, eigts1, eigts2, eigts3, mill, dffts%ngm )
!
CALL prefor( eigr, vkb )
#if defined(__CUDA)
CALL dev_memcpy( vkb_d, vkb )
#endif
!
!$acc update device(vkb)
!
CALL formf( .TRUE. , eself )
!
@ -199,7 +202,9 @@ SUBROUTINE from_restart( )
IF ( tefield ) CALL efield_berry_setup( eigr, tau0 )
IF ( tefield2 ) CALL efield_berry_setup2( eigr, tau0 )
!
CALL plugin_init_ions( tau0 )
#if defined (__ENVIRON)
IF (use_environ) CALL update_environ_ions(tau0)
#endif
!
edft%eself = eself
!

View File

@ -46,7 +46,6 @@
USE fft_base, ONLY : dffts
use wave_base, only : wave_steepest, wave_verlet
use control_flags, only : lwf, tsde, many_fft
use pseudo_base, only : vkb_d
use uspp, only : deeq, vkb
use gvect, only : gstart
use electrons_base, only : nbsp_bgrp, ispin_bgrp, f_bgrp , nspin, nupdwn_bgrp, iupdwn_bgrp
@ -276,8 +275,12 @@
ELSE
#if defined (__CUDA)
CALL dforce( i, bec_bgrp, vkb_d, c0_d, c2, c3, rhos_d, &
!$acc data present(vkb)
!$acc host_data use_device(vkb)
CALL dforce( i, bec_bgrp, vkb, c0_d, c2, c3, rhos_d, &
SIZE(rhos_d,1), ispin_bgrp, f_bgrp, nbsp_bgrp, nspin )
!$acc end host_data
!$acc end data
#else
CALL dforce( i, bec_bgrp, vkb, c0_bgrp, c2, c3, rhos, &
SIZE(rhos,1), ispin_bgrp, f_bgrp, nbsp_bgrp, nspin )

View File

@ -17,6 +17,11 @@ SUBROUTINE stop_cp_run()
USE constraints_module, ONLY : deallocate_constraint
USE mp_global, ONLY : mp_global_end
!
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_base_module, ONLY : clean_environ
#endif
!
IMPLICIT NONE
!
CALL environment_end( 'CP' )
@ -25,7 +30,9 @@ SUBROUTINE stop_cp_run()
!
IF ( lconstrain ) CALL deallocate_constraint()
!
CALL plugin_clean()
#if defined (__ENVIRON)
IF (use_environ) CALL clean_environ()
#endif
!
CALL mp_global_end()
!

View File

@ -66,10 +66,16 @@ SUBROUTINE vofrho_x( nfi, rhor, drhor, rhog, drhog, rhos, rhoc, tfirst, &
USE fft_helper_subroutines
USE plugin_variables, ONLY: plugin_etot
#if defined(__OPENACC)
#if defined(__CUDA) && defined(_OPENACC)
USE cublas
#endif
#if defined (__ENVIRON)
USE plugin_flags, ONLY : use_environ
USE environ_cp_module, ONLY : add_environ_potential, calc_environ_potential
USE environ_base_module, ONLY : calc_environ_energy, calc_environ_force
#endif
IMPLICIT NONE
!
LOGICAL :: tlast, tfirst
@ -114,6 +120,10 @@ SUBROUTINE vofrho_x( nfi, rhor, drhor, rhog, drhog, rhos, rhoc, tfirst, &
COMPLEX(DP), PARAMETER :: ci = ( 0.0d0, 1.0d0 )
INTEGER :: p_ngm_, s_ngm_, p_nnr_, s_nnr_
#if defined (__ENVIRON)
REAL(DP), ALLOCATABLE :: force_environ(:, :)
#endif
CALL start_clock( 'vofrho' )
p_ngm_ = dfftp%ngm
s_ngm_ = dffts%ngm
@ -164,15 +174,17 @@ DEV_ACC enter data create(drhot(1:p_ngm_, 1:6))
!
if (abivol.or.abisur) call vol_clu(rhor,rhog,nfi)
!
! compute plugin contributions to the potential, add it later
!
CALL plugin_get_potential(rhor,nfi)
!
! compute plugin contributions to the energy
!
plugin_etot = 0.0_dp
!
CALL plugin_energy(rhor,plugin_etot)
#if defined (__ENVIRON)
IF (use_environ) THEN
! compute plugin contributions to the potential, add it later
CALL calc_environ_potential(rhor, nfi)
! compute plugin contributions to the energy
CALL calc_environ_energy(plugin_etot)
plugin_etot = 0.5D0 * plugin_etot ! Rydberg to Hartree
END IF
#endif
!
ttsic = ( ABS( self_interaction ) /= 0 )
!
@ -495,7 +507,9 @@ DEV_ACC exit data delete(rhotmp)
!
! add plugin contributions to potential here...
!
CALL plugin_add_potential( rhor )
#if defined (__ENVIRON)
IF (use_environ) CALL add_environ_potential(rhor)
#endif
!
!
! rhor contains the xc potential in r-space
@ -509,7 +523,7 @@ DEV_ACC exit data delete(rhotmp)
ELSE
CALL rho_r2g ( dfftp, rhor, rhog )
END IF
DEV_ACC update device (rhog)
!
IF( nspin == 1 ) THEN
CALL zaxpy(p_ngm_, (1.0d0,0.0d0) , vtemp, 1, rhog(:,1), 1)
ELSE
@ -551,7 +565,14 @@ DEV_ACC end data
!
! plugin patches on internal forces
!
CALL plugin_int_forces(fion)
#if defined (__ENVIRON)
IF (use_environ) THEN
ALLOCATE (force_environ(3, nat))
CALL calc_environ_force(force_environ)
fion = fion + 0.5D0 * force_environ ! Environ forces in Ry/bohr
DEALLOCATE (force_environ)
END IF
#endif
END IF

View File

@ -3,7 +3,7 @@ include(UseLATEX)
# By default the manuals are generated and installed in pdf format
# To generat the manuals use the following command: make pdf
# It is possible to generate user_guide and developer_man
# It is possible to generate user_guide
# in HTML format with the following command: make html
set(LATEX_DEFAULT_BUILD "pdf")
@ -30,9 +30,8 @@ add_latex_document(
install(
FILES
${PROJECT_BINARY_DIR}/Doc/user_guide.pdf
${PROJECT_BINARY_DIR}/Doc/developer_man.pdf
${PROJECT_BINARY_DIR}/Doc/constraints_HOWTO.pdf
${PROJECT_BINARY_DIR}/Doc/brillouin_zones.pdf
${PROJECT_BINARY_DIR}/Doc/plumed_quick_ref.pdf
DESTINATION
share/qe)
share/Doc)

682
Doc/Hubbard_input.tex Normal file
View File

@ -0,0 +1,682 @@
\documentclass[12pt,a4paper]{article}
\def\version{7.1}
\def\qe{{\sc Quantum ESPRESSO}}
%\usepackage{html}
% BEWARE: don't revert from graphicx for epsfig, because latex2html
% doesn't handle epsfig commands !!!
\usepackage{graphicx}
\usepackage{alltt}
\usepackage{xcolor}
\textwidth = 17cm
\textheight = 24cm
\topmargin =-1 cm
\oddsidemargin = 0 cm
\def\pw{\texttt{pw.x}}
\def\hp{\texttt{hp.x}}
\begin{document}
\author{}
\date{}
%\def\qeImage{quantum_espresso}
\title{
%\includegraphics[width=5cm]{\qeImage} \\
% title
\Huge New DFT+Hubbard input in \\ \qe\ (since v.\version)
%\vskip 1 cm
%\Large Iurii Timrov (EPFL)
\date{\today}
}
\maketitle
\tableofcontents
\section{History}
\label{sec:history}
Density-functional theory (DFT) with the on-site Hubbard $U$ correction (DFT+$U$) was implemented in \qe\ since the early days of the \qe\ project (early 2000's). In the literature, this method used to be called (and still often is) as ``LDA+$U$'', since in the original paper that first introduced this method the local density approximation (LDA) for the exchange-correlation functional was used~\cite{Anisimov:1991}. However, other functionals other than LDA can be used with the Hubbard correction, and hence we obtain e.g. GGA+$U$, SCAN+$U$, etc. Therefore, it might be confusing to continue using the old name ``LDA+$U$''. Instead, for the sake of generality it is better to use a generic name ``DFT+$U$'' and then specify which functional is used.
In 1995 Liechtenstein and coworkers introduced a formulation of the Hubbard-corrected DFT that includes not only the Hubbard $U$ correction but also the Hund $J$ correction~\cite{Liechtenstein:1995}. Sometimes in is called in the literature as DFT+$U$+$J$. Within this formulation it is possible to set $J=0$ and thus obtain DFT+$U$.
In 1998 Dudarev and coworkers introduced the rotationally invariant (``simplified'') formulation of DFT+$U$~\cite{Dudarev:1998}. In this formulation, instead of having $U$ and $J$ individually we have just one effective parameter: $U_\mathrm{eff} = U - J$ (and often the subscript eff is dropped).
In 2011 Himmetoglu and coworkers introduced an extension of the Dudarev's DFT+$U$ to take into account $J$ in a simplified manner~\cite{Himmetoglu:2011}. In order to distinguish from $J$ in the Liechtenstein's DFT+$U$+$J$, here we use the name ``$J_0$''. This DFT+$U$+$J_0$ formulation is not yet a well-established method and it is an active field of research (see e.g. Refs.~\cite{Bajaj:2017, Linscott:2018}).
One year earlier, in 2010 Campto Jr and Cococcioni extended Dudarev's formulation of DFT+$U$ to include inter-site Hubbard $V$ interactions~\cite{Campo:2010}. This is known as the DFT+$U$+$V$ approach.
All the aforementioned methods are implemented in the official \qe\ \version.
\section{Why changing the old input?}
In \qe\ 7.0 and earlier, the input parameters for the \pw\ code were the following:
\begin{itemize}
\item \texttt{lda\_plus\_u}
\item \texttt{lda\_plus\_u\_kind}
\item \texttt{Hubbard\_U}
\item \texttt{Hubbard\_J}
\item \texttt{Hubbard\_J0}
\item \texttt{Hubbard\_V}
\item \texttt{U\_projection\_type}
\end{itemize}
Moreover, the Hubbard manifold and the initial atomic occupations were hard-coded in\\ \texttt{Modules/set\_hubbard\_l.f90} and \texttt{PW/src/tabd.f90}. The data in these routines was far from being complete. So the user had to modify these routines each time when there were missing chemical elements and recompile the code. Of course, this was not user friendly especially when \qe\ was already compiled on some clusters and the user had to ask system administrators to recompile the code to adapt it to user's needs.
In addition, the name \texttt{lda\_plus\_u} refers to the old name ``LDA+$U$'', which is mentioned in Sec.~\ref{sec:history}. So this was confusing if e.g. the user want to use GGA (so actually doing GGA+$U$ and not LDA+$U$). The name \texttt{U\_projection\_type} again refers to $U$, but what if we use also $J$ or $V$? So it makes sense to get rid of ``U'' in the naming and use a generic term ``Hubbard'' that covers all cases (DFT+$U$, DFT+$U$+$J$, DFT+$U$+$V$, etc.).
A subgroup of \qe\ developers came up with the idea to try and improve the input syntax in the DFT+Hubbard codes to make it more user-friendly. This new DFT+Hubbard input syntax replaces the old one starting from \qe\ \version.
\section{New DFT+Hubbard input}
In this section we present the new DFT+Hubbard input syntax that replaces the old one starting from \qe\ \version. Let us give examples for different flavors of DFT+Hubbard.
\subsection{DFT+$U$ (Dudarev's formulation)}
\textbf{Important notice:} The Hubbard $U$ values shown in the examples below are random values chosen just for the sake of demonstration purposes and they must not be used for production calculations.\\
\noindent
In the past, to use this case the user had to specify in the \pw\ input file e.g. the following:
%
\noindent
\begin{verbatim}
&system
...
lda_plus_u = .true.
lda_plus_u_kind = 0
U_projection_type = 'ortho-atomic'
Hubbard_U(1) = 5.0
Hubbard_U(2) = 6.0
/
\end{verbatim}
\noindent
Below is the example of the new input syntax of DFT+$U$ (Dudarev's formulation) for Ni2MnGa:
%
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{U Ni-3d 6.0}
\end{alltt}
%
Note that in the example above we do not specify any parameters related to DFT+$U$ in the \texttt{system} namelist (contrary to what was done in the past). All Hubbard-related parameters are now specified in the new card called ``HUBBARD''. The user has to specify the type of the Hubbard projectors that will be used in DFT+$U$. This is done by writing the type of projectors on the same line where the HUBBARD card name appears. In the past the type of Hubbard projectors was specified using the input keyword \texttt{U\_projection\_type}, which is no longer used. And now it is not needed to specify \texttt{lda\_plus\_u=.true.}
The possible options for Hubbard projectors are: \texttt{atomic}, \texttt{ortho-atomic}, \texttt{norm-atomic}, \texttt{wf}, and \texttt{pseudo}. There is no default for Hubbard projectors, i.e. the user must specify it. Please see \texttt{/Doc/INPUT\_PW.txt} for the description of these options. The most frequently used types of projectors are \texttt{atomic} and \texttt{ortho-atomic}. It is recommended use \texttt{ortho-atomic} whenever possible. The advantage of \texttt{ortho-atomic} over \texttt{atomic} is that the Hubbard corrections are applied only once in the former case, while in the latter case they are applied twice in the orbital overlap regions. So generally \texttt{ortho-atomic} Hubbard projectors give more accurate results (e.g. atomic occupations) that those obtained using the \texttt{atomic} Hubbard projectors. If you are interested to learn more about the Hubbard projectors you are invited to check e.g. Refs.~\cite{Wang:2016, Mahajan:2021}.
In the example above, we specified the Hubbard $U$ values of 5.0 and 6.0~eV for Mn-$3d$ and Ni-$3d$ states, respectively. Here, $3d$ are the Hubbard manifolds. Previously these manifolds were tabulated and hard-coded in the routines \texttt{Modules/set\_hubbard\_l.f90}. Now these manifolds must be specified in the HUBBARD card for each chemical element. The initial occupations of these manifolds were previously tabulated and hard-coded in \texttt{PW/src/tabd.f90}, but now the initial occupations are read from the pseudopotentials. If the user is not happy with this default behavior of the code, then it is possible to overwrite these initial occupations by specifying them in the input file in the \texttt{system} namelist using a new keyword \texttt{Hubbard\_occ(ityp,i)}, where \texttt{ityp} is the atomic type number (see \texttt{ATOMIC\_SPECIES}), and \texttt{i} runs from 1 to 3 (because there can be up to 3 Hubbard manifolds per one atomic type - see more below). The example is given below:
%
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5,
\textcolor{red}{Hubbard_occ(1,1) = 5.00}
\textcolor{red}{Hubbard_occ(2,1) = 8.00}
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{U Ni-3d 6.0}
\end{alltt}
%
Some pseudopotentials are generated in the ionized state (this is not the case here), and the occupations of e.g. $3d$ shell in these pseudopotentials can be different from what is expected in a neutral atom. In this case it might be useful to use the keyword \texttt{Hubbard\_occ(ityp,i)} as shown above. Note that in magnetic systems there are many local minima and the DFT+$U$ calculation can converge to different ground states depending on the initial occupation of the Hubbard manifold.
It is possible to specify 2 Hubbard channels/manifolds per atomic type, as shown in the example below:
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{U Mn-3p 3.0}
\textcolor{red}{U Ni-3d 6.0}
\textcolor{red}{U Ni-4s 2.0}
\end{alltt}
%
In this example we apply $U=5.0$~eV to Mn-$3d$ states and $U=3.0$~eV to Mn-$3p$ states, where $3d$ appears first in the list and hence this is the first Hubbard channel/manifold for Mn while $3p$ appears second and hence this is the second Hubbard channel/manifold for Mn. Similarly, we apply $U=6.0$~eV to Ni-$3d$ states and $U=2.0$~eV to Ni-$4s$ states. It is important to remark that when the user specifies the Hubbard manifolds he/she must make sure that these states are present in the pseudopotentials that are used.
Moreover, it is possible to specify even 3 Hubbard channels/manifolds per atomic type. However, in this case the 2nd and the 3rd Hubbard manifolds will be considered as one effective manifold, and the same Hubbard $U$ will be applied to this effective manifold. Please see the example below:
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{U Mn-3p-3s 3.0}
\textcolor{red}{U Ni-3d 6.0}
\textcolor{red}{U Ni-4s 2.0}
\end{alltt}
%
In this example we apply $U=5.0$~eV to Mn-$3d$ states (1st Hubbard manifold) and $U=3.0$~eV to Mn-$3p$ and Mn-$3s$ states (2nd and 3rd Hubbard manifolds that are considered as one effective manifold). For Ni it is the same as in the previous example.\\
\noindent
It is possible to specify the initial occupations of all Hubbard manifolds of each atomic type from the input. This will overwrite the occupations that are read by default from the pseudopotentials. The example is shown below:
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5,
\textcolor{red}{Hubbard_occ(1,1) = 5.00}
\textcolor{red}{Hubbard_occ(1,2) = 6.00}
\textcolor{red}{Hubbard_occ(1,3) = 2.00}
\textcolor{red}{Hubbard_occ(2,1) = 8.00}
\textcolor{red}{Hubbard_occ(2,2) = 2.00}
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{U Mn-3p-3s 3.0}
\textcolor{red}{U Ni-3d 6.0}
\textcolor{red}{U Ni-4s 2.0}
\end{alltt}
%
In this example, \texttt{Hubbard\_occ(1,1)} corresponds to the occupations of Mn-$3d$ states,\\ \texttt{Hubbard\_occ(1,2)} corresponds to the occupations of Mn-$3p$ states, and\\ \texttt{Hubbard\_occ(1,3)} corresponds to the occupations of Mn-$3s$ states. Similarly,\\ \texttt{Hubbard\_occ(2,1)} corresponds to the occupations of Ni-$3d$ states, and\\ \texttt{Hubbard\_occ(2,2)} corresponds to the occupations of Ni-$4s$ states.\\
\noindent
Below is the example showing how to perform DFT+$U$+$J_0$ calculation:
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{J0 Mn-3d 1.0}
\textcolor{red}{U Ni-3d 6.0}
\textcolor{red}{J0 Ni-3d 1.2}
\end{alltt}
%
In the example above we apply Hubbard $U=5.0$~eV and Hund $J_0 = 1.0$~eV to Mn-$3d$ states, and Hubbard $U=6.0$~eV and Hund $J_0 = 1.2$~eV to Ni-$3d$ states. In the past, \texttt{J0} was specifed using the parameter \texttt{Hubbard\_J0} in the \texttt{system} namelist. Note that \texttt{J0} currently can be used only for one Hubbard channel.\\
\noindent
The code reads all lines in the \texttt{HUBBARD} card until the end of file is reached or until the next card is found in the input.\\
\noindent
Finally, note that currently the Dudarev's DFT+$U$ is not implemented for the noncollinear spin-polarized case. However, Liechtenstein's DFT+$U$ supports the noncollinear spin-polarized case, and so if you use this case then the code will automatically switch to the Liechtenstein's DFT+$U$.
\subsection{DFT+$U$+$J$ (Liechtenstein's formulation)}
\textbf{Important notice:} The Hubbard $U$ and Hund $J$ values shown in the examples below are random values chosen just for the sake of demonstration purposes and they must not be used for production calculations.\\
\noindent
In the past, to use this case the user had to specify in the \pw\ input file e.g. the following:
%
\noindent
\begin{verbatim}
&system
...
lda_plus_u = .true.
lda_plus_u_kind = 1
U_projection_type = 'ortho-atomic'
Hubbard_U(1) = 5.0
Hubbard_J(1,1) = 1.0
Hubbard_J(2,1) = 1.1
Hubbard_U(2) = 6.0
Hubbard_J(1,2) = 1.2
Hubbard_J(2,2) = 1.3
/
\end{verbatim}
%
The meaning of \texttt{Hubbard\_J(i,ityp)} was the following (\texttt{i} runs from 1 to 3, and \texttt{ityp} is the atomic type):
\begin{itemize}
\item For $p$ orbitals: $J$ = \texttt{Hubbard\_J(1,ityp)};
\item For $d$ orbitals: $J$ = \texttt{Hubbard\_J(1,ityp)}, $B$ = \texttt{Hubbard\_J(2,ityp)};
\item For $f$ orbitals: $J$ = \texttt{Hubbard\_J(1,ityp)}, $E2$ = \texttt{Hubbard\_J(2,ityp)}, $E3$ = \texttt{Hubbard\_J(3,ityp)} ;
\end{itemize}
(If $B$ or $E2$ or $E3$ were not specified or set to 0 they were calculated from $J$ using atomic ratios.)\\
Where these name conventions come from? There are many possible choices how to parametrize Hubbard interactions: $i)$ Slater integrals $F^0$, $F^2$, $F^4$, ..., $ii)$ standard Racah parameters $A$, $B$, $C$, $D$, ..., $iii)$ another set of Racah parameters $E^0$, ..., $E^3$, $iv)$ more physical choice $U$ and $J$ plus other missing like $B$ for the $d$ shell or $E^2$ and $E^3$ for the $f$ shell. In \qe\ the latter notation is used. Check the following references for further reading~\cite{Liechtenstein:1995, Racah:1942, Racah:1942b, Racah:1943, Racah:1949, Griffith:1961}.\\
\noindent
Below is the example of the new input syntax of DFT+$U$+$J$ (Liechtenstein's formulation) for Ni2MnGa:
%
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='Ni2MnGa'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 7, celldm(1) = 7.80, celldm(3) = 1.4142136,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0, nspin = 2,
occupations ='smearing', smearing ='mv', degauss = 0.01,
starting_magnetization(1) = 0.5,
starting_magnetization(2) = 0.5
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Mn 54.938 Mn.pbesol-spn-rrkjus_psl.0.3.1.UPF
Ni 58.693 Ni.pbesol-n-rrkjus_psl.0.1.UPF
Ga 69.723 Ga.pbesol-dn-rrkjus_psl.0.2.UPF
ATOMIC_POSITIONS (crystal)
Mn 0.0000000000 0.0000000000 0.0000000000
Ni 0.5000000000 0.7500000000 0.2500000000
Ni 0.5000000000 0.2500000000 0.7500000000
Ga 0.0000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Mn-3d 5.0}
\textcolor{red}{J Mn-3d 1.0}
\textcolor{red}{B Mn-3d 1.1}
\textcolor{blue}{U Ni-3d 6.0}
\textcolor{blue}{J Ni-3d 1.2}
\textcolor{blue}{B Ni-3d 1.3}
\end{alltt}
%
\noindent
If you want to use DFT+$U$ in the Liechtenstein's formulation (without $J$) then you still need to specify some very small value of $J$ (e.g. \texttt{1.d-12}) so that the automatic algorithm decides that this is the Liechtenstein's formulation. If \texttt{J} is not present in the \texttt{HUBBARD} card then the code will automatically assume that this is DFT+$U$ in the Dudarev's formulation.
\subsection{DFT+$U$+$V$ (Dudarev's formulation)}
\textbf{Important notice:} The Hubbard $U$ and $V$ values shown in the examples below are random values chosen just for the sake of demonstration purposes and they must not be used for production calculations.\\
\noindent
In the past, to use this case the user had to specify in the \pw\ input file e.g. the following:
%
\noindent
\begin{verbatim}
&system
...
lda_plus_u = .true.
lda_plus_u_kind = 2
U_projection_type = 'ortho-atomic'
Hubbard_V(1,1,1) = 7.70
Hubbard_V(1,19,1) = 0.75
Hubbard_V(1,46,1) = 0.75
Hubbard_V(1,43,1) = 0.75
Hubbard_V(1,54,1) = 0.75
Hubbard_V(1,11,1) = 0.75
Hubbard_V(1,22,1) = 0.75
/
\end{verbatim}
%
The meaning of \texttt{Hubbard\_V(na,nb,k)}, where \texttt{na} and \texttt{nb} label atoms as they are specified in the \texttt{ATOMIC\_POSITIONS} card (not in the \texttt{ATOMIC\_SPECIES} card!), and \texttt{k} controls the ``interaction type''. When \texttt{na}=\texttt{nb}, \texttt{Hubbard\_V(na,na,k)} corresponds to \texttt{Hubbard\_U(ityp(na))}, where \texttt{ityp(na)} is the atomic type of atom \texttt{na}. The index \texttt{k} could take the following values:
\begin{itemize}
\item \texttt{k}=1: interaction between standard orbitals (both on na and nb);
\item \texttt{k}=2: interaction between standard (on na) and background (on nb) orbitals;
\item \texttt{k}=3: interaction between background orbitals (both on na and nb);
\item \texttt{k}=4: interaction between background (on na) and standard (on nb) orbitals.
\end{itemize}
Standard orbitals correspond to the main Hubbard channel (e.g. d electrons in transition metals) and background orbitals correspond to the secondary Hubbard channel (e.g. p electrons in transition metals).\\
\noindent
The second index of \texttt{Hubbard\_V(na,nb,k)} (i.e. the index \texttt{nb}) corresponds to atoms that are neighbors to atom \texttt{na}. You can notice that \texttt{nb} can take quite large values (even larger than the total number of atoms in the simulation cell). This is so because we are using periodic boundary conditions and hence some neighbors fall outside of our simulation cell. For this reason, the code generates virtual cells around our real cells. This way we can find all neighbors. In practice, this is achieved by constructing a virtual $3 \times 3 \times 3$ supercell and by replicating atoms. This is why the indices of neighboring atoms are so strange. If you are interested how these indices are generated, please check the subroutine \texttt{PW/src/intersiteV.f90}. {\it A priori}, it is not obvious how to find the indices of neighbors. For this reason you can use the \hp\ code of \qe\ that will determine the values of $U$ and $V$ and the indices of couples.\\
\noindent
In the new input, the same logic holds but the input syntax has changed. Below is the example of the new input syntax of DFT+$U$+$V$ (Dudarev's formulation) for LiCoO$_2$:
%
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='LiCoO2'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 5, celldm(1) = 9.3705, celldm(4) = 0.83874,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Co 59.0 Co.pbesol-spn-rrkjus_psl.0.3.1.UPF
O 16.0 O.pbesol-n-rrkjus_psl.0.1.UPF
Li 7.0 Li.pbesol-s-rrkjus_psl.0.2.1.UPF
ATOMIC_POSITIONS (crystal)
Co 0.0000000000 0.0000000000 0.0000000000
O 0.2604885000 0.2604885000 0.2604885000
O 0.7395115000 0.7395115000 0.7395115000
Li 0.5000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
\textcolor{red}{HUBBARD (ortho-atomic)}
\textcolor{red}{U Co-3d 7.70}
\textcolor{red}{V Co-3d O-2p 1 19 0.75}
\textcolor{red}{V Co-3d O-2p 1 46 0.75}
\textcolor{red}{V Co-3d O-2p 1 43 0.75}
\textcolor{red}{V Co-3d O-2p 1 54 0.75}
\textcolor{red}{V Co-3d O-2p 1 11 0.75}
\textcolor{red}{V Co-3d O-2p 1 22 0.75}
\end{alltt}
%
In this case, the code will detect \texttt{U} and \texttt{V} parameters in the \texttt{HUBBARD} card, and so the code will consider this as being a DFT+$U$+$V$ calculation. The first line in the \texttt{HUBBARD} card corresponds to the on-site Hubbard $U$ parameter that is used for Co-$3d$ states, and the value of this parameter is 7.70~eV. Note that the equivalent allowed syntax for this first line is the following:
%
\noindent
\begin{alltt}
\textcolor{red}{V Co-3d Co-3d 1 1 7.70}
\end{alltt}
%
All the other lines in the \texttt{HUBBARD} card above correspond to the inter-site Hubbard $V$ parameters between Co-$3d$ and O-$2p$ states. Why do we have 6 of them? Because in LiCoO$_2$ each Co atom has 6 nearest neighbors (octahedral coordination geometry for Co atoms). In this example, all 6 O atoms are at the same distance from Co, so the value of $V$ parameters are all equal to 0.75~eV. but in general, there might be complex distortion of the structure, and hence there might be different Co-O distances and hence the values of $V$ parameters will be somewhat different. The indices that appear in the 4th and 5th columns of the \texttt{V} entries correspond to the \texttt{na} and \texttt{nb} indices of the arrays \texttt{Hubbard\_V(na,nb,k)} that are still used internally in the \pw\ code. If we have just one occurrence of \texttt{V} for a given couple of indices \texttt{na} and \texttt{nb}, then this will be attributed to \texttt{k=1}, i.e. the so-called ``standard-standard'' interaction. In this example, the ``standard-standard'' interaction means that we take into account the interaction between Co-$3d$ and O-$2p$ states.\\
\noindent
Below we give a more advanced example that shows how to take into account also other types of inter-site interactions.
%
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='LiCoO2'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 5, celldm(1) = 9.3705, celldm(4) = 0.83874,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Co 59.0 Co.pbesol-spn-rrkjus_psl.0.3.1.UPF
O 16.0 O.pbesol-n-rrkjus_psl.0.1.UPF
Li 7.0 Li.pbesol-s-rrkjus_psl.0.2.1.UPF
ATOMIC_POSITIONS (crystal)
Co 0.0000000000 0.0000000000 0.0000000000
O 0.2604885000 0.2604885000 0.2604885000
O 0.7395115000 0.7395115000 0.7395115000
Li 0.5000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
HUBBARD (ortho-atomic)
\textcolor{red}{V Co-3d Co-3d 1 1 7.70}
\textcolor{red}{V Co-3d Co-3p 1 1 1.00}
\textcolor{red}{V Co-3p Co-3p 1 1 2.00}
\textcolor{red}{V Co-3p Co-3d 1 1 1.00}
\textcolor{blue}{V Co-3d O-2p 1 19 0.75}
\textcolor{blue}{V Co-3d O-2s 1 19 0.60}
\textcolor{blue}{V Co-3p O-2s 1 19 0.50}
\textcolor{blue}{V Co-3p O-2p 1 19 0.60}
...
\end{alltt}
%
In this example, we have specified 4 types of interactions per couple. Note that in this example we replaced \texttt{U} for Co-$3d$ states using \texttt{V}, as was discussed above (``standard-standard'', i.e. \texttt{k=1}). In red and blue we highlight two groups of couples. In red we show the first group that describes 4 types of interactions for the Co atoms. The first line in the red block corresponds to the on-site $U$ value for Co-$3d$ states. The second line in the red block corresponds to the on-site interaction between Co-$3d$ and Co-$3p$ states (``standard-background'', i.e. \texttt{k=2}), the third line in the red block corresponds to the on-site interaction between Co-$3p$ and Co-$3p$ states (``background-background'', i.e. \texttt{k=3}), and the fourth line in the red block corresponds to the on-site interaction between Co-$3p$ and Co-$3d$ states (``background-standard'', i.e. \texttt{k=4}). Note that second and the fourth lines in the red block describe the same thing, so it is ok to drop the fourth line. {\bf Important notice:} It is obligatory to keep the order of entries as shown in the example above: 1) standard-standard, 2) standard-background, 3) background-background, 4) background-standard. If you do not respect this order then the code will complain and stop. The second block above (shown in blue) has the same logic as the one we presented above for the red block. The only difference is that in the blue block we describe various types of interactions centered on different atoms (thus inter-site, not on-site).\\
\noindent
To make things even more complicated, it is possible to specify two Hubbard manifolds in the ``background'' channel. The example is shown below.
%
\noindent
\begin{alltt}
&control
calculation='scf'
restart_mode='from_scratch',
prefix='LiCoO2'
pseudo_dir = '../pseudo'
outdir='./tmp'
/
&system
ibrav = 5, celldm(1) = 9.3705, celldm(4) = 0.83874,
nat = 4, ntyp = 3, ecutwfc = 50.0, ecutrho = 400.0
/
&electrons
conv_thr = 1.d-10
mixing_beta = 0.7
/
ATOMIC_SPECIES
Co 59.0 Co.pbesol-spn-rrkjus_psl.0.3.1.UPF
O 16.0 O.pbesol-n-rrkjus_psl.0.1.UPF
Li 7.0 Li.pbesol-s-rrkjus_psl.0.2.1.UPF
ATOMIC_POSITIONS (crystal)
Co 0.0000000000 0.0000000000 0.0000000000
O 0.2604885000 0.2604885000 0.2604885000
O 0.7395115000 0.7395115000 0.7395115000
Li 0.5000000000 0.5000000000 0.5000000000
K_POINTS (automatic)
4 4 4 0 0 0
HUBBARD (ortho-atomic)
\textcolor{red}{V Co-3d Co-3d 1 1 7.70}
\textcolor{red}{V Co-3d Co-3p-3s 1 1 1.00}
\textcolor{red}{V Co-3p-3s Co-3p-3s 1 1 2.00}
\textcolor{red}{V Co-3p-3s Co-3d 1 1 1.00}
\textcolor{blue}{V Co-3d O-2p 1 19 0.75}
\textcolor{blue}{V Co-3d O-2s-1s 1 19 0.60}
\textcolor{blue}{V Co-3p-3s O-2s-1s 1 19 0.50}
\textcolor{blue}{V Co-3p-3s O-2p 1 19 0.60}
...
\end{alltt}
%
Note that pseudopotentials for O do not include $1s$ states. So the example above is just to show that if you have other elements instead of O and there are more deeper-lying states you may include them in the ``background'' channel. As before, it is possible to control the initial occupations of all these Hubbard manifolds using the keyword \texttt{Hubbard\_occ} (i.e. if you are not happy with the initial occupations that are read from pseudopotentials).\\
\noindent
Hubbard parameters $U$ and $V$ can be computed using the \hp\ code of \qe. However, the \hp\ currently supports the calculations of $U$ and $V$ for one Hubbard channel per atomic type. In other words, the advanced features presented above (i.e. cross-manifold interactions) are currently not implemented in \hp.
\section{Calculation of Hubbard parameters}
In Hubbard-corrected DFT the values of Hubbard parameters are not known {\it a priori}. It is a common practice in literature to evaluate them semi-empirically by fitting various experimental properties (when available), which prevents this method from being fully {\it ab initio} and from being predictive for novel materials. Most importantly, it is often forgotten that $U$ acts on a Hubbard manifold that is defined in terms of different Hubbard projector functions such of e.g. atomic orbitals (\texttt{atomic}) or L\"owdin orthogonalized atomic orbitals (\texttt{ortho-atomic}). These are typically taken from the atomic calculations used to generate the respective pseudopotentials, that can be constructed with different degrees of oxidation. Hence, these manifolds, and the relative $U$ parameters, are not transferable and one should not consider $U$ as a universal number for a given element or material (see the appendix in Ref.~\cite{Kulik:2008}). There are other types of Hubbard projector functions (e.g. truncated atomic orbitals (\texttt{Abinit}), PAW projectors (VASP), etc.), and the value of $U$ depends on which type of projector functions are used. Therefore, in general it is not correct to take $U$ from the literature and use it for your DFT+$U$ calculations without paying attention to what pseudopotentials were used, which Hubbard projector functions, etc.
During the past 30 years there has been a large effort to develop methods for the first-principles calculation of $U$. Among these, the constrained DFT (cDFT) approach, the Hartree-Fock-based approaches, and the constrained random phase approximation (cRPA) approach are the most popular. A linear-response formulation of cDFT (LR-cDFT) was introduced in Ref.~\cite{Cococcioni:2005} and generalized to the calculation of the inter-site Hubbard parameters $V$ in Ref.~\cite{Campo:2010}. Calculation of $U$ and $V$ using LR-cDFT can be done using the \pw\ code (see \texttt{Hubbard\_alpha} in the \pw\ documentation). However, this method requires using supercells which makes LR-cDFT computationally expensive. Moreover, the postprocessing of the data requires writing some small programs and/or scripts. Recently, LR-cDFT has been recast via density-functional perturbation theory (DFPT)~\cite{Timrov:2018, Timrov:2021}, allowing us to overcome several challenges of the supercell approach of Ref.~\cite{Cococcioni:2005}. In fact, by constructing the response of the system to a localized perturbation through a series of independent monochromatic perturbations to the primitive unit cell (rather than from finite-differences between calculations in supercells as in LR-cDFT), it improves significantly the computational efficiency, accuracy, user-friendliness, and automation. Key to this is indeed the capability to express perturbation theory in reciprocal space~\cite{Baroni:1987, Giannozzi:1991, Baroni:2001}. It is important to mention that the present formulation (be it in a LR-cDFT or DFPT implementation) aims to correct the over-delocalization and over-hybridization of the electrons in the localized Hubbard manifold; for this reason it is not appropriate to deal with closed-shell systems, where the electrons are fully contained in the localized manifold~\cite{Yu:2014}.
The DFPT method for computing Hubbard parameters is implemented in the \hp\ code which is part of the \qe\ distribution. Check the examples in the \texttt{HP} directory to get started. If you have any questions or problems, please read carefully the posting guidelines~\cite{QE} and ask your questions on the QE users forum (users@lists.quantum-espresso.org).
\begin{thebibliography}{100}
\bibitem{Anisimov:1991} V.I. Anisimov, J. Zaanen, and O.K. Andersen, \textit{Band theory and Mott insulators: Hubbard $U$ instead of Stoner $I$}, Phys. Rev. B. {\bf 44}, 943 (1991).
\bibitem{Liechtenstein:1995} A.I. Liechtenstein, V.I. Anisimov, J. Zaanen, \textit{Density-functional theory and strong interactions: Orbital ordering in Mott-Hubbard insulators}, Phys. Rev. B {\bf 52}, R5467 (1995).
\bibitem{Dudarev:1998} S.L. Dudarev, G.A. Botton, S.Y. Savrasov, C.J. Humphreys, A.P. Sutton, \textit{Electron-energy-loss spectra and the structural stability of nickel oxide: An LSDA+U study}, Phys. Rev. B {\bf 57}, 1505 (1998).
\bibitem{Himmetoglu:2011} B. Himmetoglu, R.M. Wentzcovitch, M. Cococcioni, \textit{First-principles study of electronic and structural properties of CuO}, Phys. Rev. B {\bf 84}, 115108 (2011).
\bibitem{Bajaj:2017} A. Bajaj, J.P. Janet, and H.J. Kulik, \textit{Communication: Recovering the flat-plane condition in electronic structure theory at semi-local DFT cost}, J. Chem. Phys. {\bf 147}, 191101 (2017).
\bibitem{Linscott:2018} E.B. Linscott, D.J. Cole, M.C. Payne, and D.D. O'Regan, \textit{Role of spin in the calculation of Hubbard $U$ and Hund's $J$ parameters from first principles}, Phys. Rev. B {\bf 98}, 235157 (2018).
\bibitem{Racah:1942} G. Racah, \textit{Theory of Complex Spectra. I}, Phys. Rev. {\bf 61}, 186 (1942).
\bibitem{Racah:1942b} G. Racah, \textit{Theory of Complex Spectra. II}, Phys. Rev. {\bf 62}, 438 (1942).
\bibitem{Racah:1943} G. Racah, \textit{Theory of Complex Spectra. III}, Phys. Rev. {\bf 63}, 367 (1943).
\bibitem{Racah:1949} G. Racah, \textit{Theory of Complex Spectra. IV}, Phys. Rev. {\bf 76}, 1352 (1949).
\bibitem{Griffith:1961} J. Griffith, \textit{Book ``The Theory of Transition Metal Ions''}, Cambridge University Press (1961).
\bibitem{Campo:2010} V.L. Campo Jr and M. Cococcioni, \textit{Extended DFT+$U$+$V$ method with on-site and inter-site electronic interactions}, J. Phys.: Condens. Matter {\bf 22}, 055602 (2010).
\bibitem{Wang:2016} Y.-C. Wang, Z.-H. Chen, and H. Jiang, \textit{The local projection in the density functional theory plus $U$ approach: A critical assessment}, J. Chem. Phys. {\bf 144}, 144106 (2016).
\bibitem{Mahajan:2021} R. Mahajan, I. Timrov, N. Marzari, and A. Kashyap, \textit{Importance of intersite Hubbard interactions in $\beta$-MnO$_2$: A first-principles DFT+$U$+$V$ study}, Phys. Rev. Materials {\bf 5}, 104402 (2021).
\bibitem{Kulik:2008} H.J. Kulik and N. Marzari, \textit{A self-consistent Hubbard $U$ density-functional theory approach to the addition-elemination reactions of hydrocarbons on bare FeO$^+$}, J. Chem. Phys. {\bf 129}, 134314 (2008).
\bibitem{Cococcioni:2005} M. Cococcioni and S. de Gironcoli, \texttt{Linear response approach to the calculation of the effective interaction parameters in the LDA+U method}, Phys. Rev. B {\bf 71}, 035105 (2005).
\bibitem{Timrov:2018} I. Timrov, N. Marzari, and M. Cococcioni, \textit{Hubbard parameters from density-functional perturbation theory}, Phys. Rev. B {\bf 98}, 085127 (2018).
\bibitem{Timrov:2021} I. Timrov, N. Marzari, and M. Cococcioni, \textit{Self-consistent Hubbard parameters from density-functional perturbation theory in the ultrasoft and projector-augmented wave formulations}, Phys. Rev. B {\bf 103}, 045141 (2021).
\bibitem{Baroni:1987} S. Baroni, P. Giannozzi, and A. Testa \textit{Green's-Function Approach to Linear Response in Solids}, Phys. Rev. Lett. {\bf 58}, 1861 (1987).
\bibitem{Giannozzi:1991} P. Giannozzi, S. de Gironcoli, P. Pavone, and S. Baroni, \textit{Ab initio calculation of phonon dispersions in semiconductors}, Phys. Rev. B {\bf 43}, 7231 (1991).
\bibitem{Baroni:2001} S. Baroni, S. de Gironcoli, A. Dal Corso, and P. Giannozzi, \textit{Phonons and related crystal properties from density-functional perturbation theory}, Rev. Mod. Phys. {\bf 73}, 515 (2001).
\bibitem{Yu:2014} K. Yu and E.A. Carter, \textit{Communication: Comparing ab initio methods of obtaining effective U parameters for closed-shell materials}, J. Chem. Phys. {\bf 140}, 121105 (2014).
\bibitem{QE} https://www.quantum-espresso.org/users-forum/
\end{thebibliography}
\end{document}

View File

@ -1,7 +1,7 @@
LATEX = pdflatex
LATEX2HTML = latex2html
PDFS = constraints_HOWTO.pdf user_guide.pdf brillouin_zones.pdf
PDFS = constraints_HOWTO.pdf user_guide.pdf brillouin_zones.pdf Hubbard_input.pdf
AUXS = $(PDFS:.pdf=.aux)
LOGS = $(PDFS:.pdf=.log)
OUTS = $(PDFS:.pdf=.out)

View File

@ -1,5 +1,5 @@
\documentclass[12pt,a4paper]{article}
\def\version{6.7}
\def\version{7.1}
\def\qe{{\sc Quantum ESPRESSO}}
\textwidth = 17cm
\textheight = 24cm

View File

@ -1,4 +1,65 @@
New in development version:
* GPU-accelerated phonon code (CINECA Team)
Incompatible changes in development version:
* CUDA Fortran Davidson code removed and replaced by ACC (Ivan Carnimeo)
Fixed in development version:
* neb.x was unable to read the HUBBARD card from input since v7.1
* r2r4 coefficient for Ba in DFT-D3 was incorrect since the original
release in v.6.2.1 (noticed by Valid Askarpour)
* Fully-relativistic PPs generated with QE versions 6.7 to 7.1 may contain
an incorrect "PP_AEWFC_rel" tag instead of the correct one "PP_AEWFC_REL"
(noticed by Andrea Dal Corso)
* Non-magnetic spin-orbit calculations could in some rare case produce
NaN's in the energy (noticed by Andrea Dal Corso)
* CP wasn't honoring "exx_fraction" in v.7.1 (fixed by Hsin-Yu Ko)
* QEHeat and KCW were not working if compiled for GPU
New in 7.1 version:
* DFT+RISM (1D, 3D, Laue) by S. Nishihara & M. Otani [Phys. Rev. B 96, 115429 (2017)].
* Improved, streamlined and extended porting to NVidia GPUs
* KCW package for Koopmans-compliant functionals in a Wannier representation:
https://journals.aps.org/prx/abstract/10.1103/PhysRevX.8.021051.
Developed and maintained by N. Colonna, R. de Gennaro, E. Linscott
* If no explicit parallelization options -nk, -nt, -nd are provided,
pw.x will figure out suitable values for optimal, or at least, not
too bad, parallelization
* EPW v.5.5. For the full list of new features, bug fixes, and changes leading
to backward incompatibility issues, please visit the Releases page of the
EPW documentation site [https://docs.epw-code.org/doc/Releases.html].
Incompatible changes in 7.1 version:
* Source files previously found in FFTXLIB/ moved to FFTXLIB/src/
* gen_us_dj, gen_us_dj moved to upflib/ and into module uspp_init
* DFT+Hubbard codes have new input syntax (check Doc/Hubbard_input.pdf)
Fixed in 7.1 version:
* Berry phases could in some cases be off by a factor eR/V.
Present since v.6.6 (noticed by S. Spreafico, fixed by R. Cohen)
* `divide_class_so` routine could yield the wrong irreducible representations
classification in the D3_h group (G.J. Ferreira, A. Dal Corso)
* XSPECTRA gave incorrect results with k-point parallelization, since
at least v. 6.6, due to missing broadcast of recomputed Fermi energy
(found and fixed by Fanchan Meng, Brookhaven)
* Bugfix in DFPT+U with PAW pseudos and when fildvscf/=''
* Makov-Payne correction wasn't working with ibrav=0 and cubic cell (A. Fonari)
* Card ADDITIONAL_KPOINTS wasn't working as expected (Prasenjit Ghosh)
* Subtle bug in G-vector ordering, usually triggered by almost-but-not-quite
symmetric primitive lattice vectors, was affecting k=0 calculations (not CP)
since v.6.0. The ultimate fix would require to change routine hpsort_eps;
the current workaround consists in not recomputing k+G indices if k=0.
* In CMake GPU builds, one routine fftx_threed2oned_gpu was not compiled with
a proper GPU compiler option and caused failure in GGA-noncolin calculations.
* CP with Hubbard U was crashing also when reading the xml file (v.7.0)
* Bugfix in turboMagnon for k point grids that have Gamma or points at the
Brillouin zone edge (the weights were not correct)
* Libxc (5.2.x) linking failure when using CMake.
New in 7.0 version:
* Unscreened and screened (range-separated) vdW-DF hybrids
(Per Hyldgaard and collaborators)
* GPU support for PWscf and CP significantly extended
* RMM-DIIS for CPU (S. Nisihara) and GPU (E. de Paoli, P. Delugas)
* DFT-D3: MPI parallelization and GPU acceleration with OpenACC
* projwfc.x can be used to compute the PDOS in a local basis (I. Timrov)
@ -11,7 +72,7 @@ New in development version:
For the full list of new features, bug fixes, and changes leading to backward incompatibility issues,
please visit the Releases page of the EPW documentation site [https://docs.epw-code.org/doc/Releases.html].
Fixed in development version:
Fixed in 7.0 version:
* Possible out-of-bound error (gfortran only) could crash DFT+U
* incorrect exx factor multiplication in the gga term of polarized cx0
functional (v.6.8 only)
@ -28,10 +89,16 @@ Fixed in development version:
* CP with DFT+U could crash when writing the xml file (since v.6.6)
* Restart of electron-phonon calculations was not working due to a missing
tag (thanks to Miguel Marques for reporting this)
* atomic: the exchange of two lines in import_upf.f90 had broken PAW tests
since v.6.5 (thanks to Chiara Biz for reporting, to AdC for fixing)
* Calculation of ELF for spin-unpolarized cases was grossly wrong in v.6.8
* Changes to the i-Pi interface to adapt it to usage with ASE had broken
the original functionality
Incompatible changes in develoment version:
Incompatible changes in 7.0 version:
* Changes to Makefiles and to file "make.inc" (they must be regenerated)
* clib/ deleted, files clib/*.c moved to UtilXlib/ or to Modules/
* Some reshuffling of variables for noncollinear and spin-orbit calculations
Known problems in 6.8 version:
* electron-phonon calculation in the non-colinear/spinorbit case is broken

View File

@ -1,5 +1,5 @@
\documentclass[12pt,a4paper]{article}
\def\version{7.0}
\def\version{7.1}
\def\qe{{\sc Quantum ESPRESSO}}
\usepackage{html}
@ -83,6 +83,8 @@ more specialized calculations:
\item \texttt{HP}: calculation of Hubbard $U$ parameters using DFPT;
\item \texttt{QEHeat}: energy current in insulators for thermal
transport calculations.
\item \texttt{KCW}: quasiparticle energies of finite and extended systems
using Koopmans-compliant functionals in a Wannier representation.
\end{itemize}
The following auxiliary packages are included as well:
\begin{itemize}
@ -174,6 +176,8 @@ work of Pietro Bonf\`a (Univ. Parma).
Contributors to \qe, beyond the authors of the papers
mentioned in Sec.\ref{SubSec:Terms}, include:
\begin{itemize}
\item Victor Yu (Urbana-Champaign) for various bug fixes and
optimizations;
\item Alexandre Tkatchenko's group, in particular Szabolcs Goger
(U. Luxembourg), and Robert DiStasio's group, in particular
Hsin-Yu Ko (Cornell), for Many-Body Dispersion (MBD) correction;
@ -231,8 +235,11 @@ and let us apologize to everybody we have forgotten.
\subsection{Contacts}
\label{SubSec:Contacts}
The main entry point for \qe\ users is the web site
\texttt{http://www.quantum-espresso.org/}. There you find
The main entry point for \qe\ users is the web site:
\begin{quote}
\texttt{http://www.quantum-espresso.org/}.
\end{quote}
There you find
the stable releases for download, general information and
documentation.
@ -363,21 +370,22 @@ The \qe\ distribution contains several directories. Some of them are
common to all packages:
\begin{tabular}{ll}
\texttt{Modules/} & Fortran modules and utilities used by all programs\\
\texttt{upflib/} & pseudopotential-related code, plus conversion tools\\
\texttt{include/} & files *.h included by fortran and C source files\\
\texttt{clib/} & libraries and utilities written in C\\
\texttt{FFTXlib/} & FFT libraries\\
\texttt{LAXlib/} & Linear Algebra (parallel) libraries\\
\texttt{KS\_Solvers/} & Iterative diagonalization routines\\
\texttt{UtilXlib/}& Miscellaneous timing, error handling, MPI utilites\\
\texttt{XClib/} & Exchange-correlation functionals (excepted van der Waals)\\
\texttt{install/} & installation scripts and utilities\\
\texttt{pseudo}/ & pseudopotential files used by examples\\
\texttt{Doc/} & general documentation\\
\texttt{archive/} & external libraries in .tar.gz form\\
\texttt{external/}& external libraries downloaded by CMake\\
\texttt{test-suite/} & automated tests\\
\texttt{Modules/} & Fortran modules and utilities used by all programs\\
\texttt{upflib/} & pseudopotential-related code, plus conversion tools\\
\texttt{include/} & files *.h included by fortran and C source files\\
\texttt{FFTXlib/} & FFT libraries\\
\texttt{LAXlib/} & Linear Algebra (parallel) libraries\\
\texttt{KS\_Solvers/}& Iterative diagonalization routines\\
\texttt{UtilXlib/} & Miscellaneous timing, error handling, MPI utilites\\
\texttt{XClib/} & Exchange-correlation functionals (excepted van der Waals)\\
\texttt{MBD/} & Routines for many-body dispersions\\
\texttt{LR\_Modules/}& Fortran modules and utilities used by linear-response codes\\
\texttt{install/} & installation scripts and utilities\\
\texttt{pseudo}/ & pseudopotential files used by examples\\
\texttt{Doc/} & general documentation\\
\texttt{archive/} & external libraries in .tar.gz form\\
\texttt{external/} & external libraries downloaded by CMake\\
\texttt{test-suite/} & automated tests
\end{tabular}
\\
while others are specific to a single package:
@ -392,14 +400,14 @@ while others are specific to a single package:
\texttt{CPV/} & \CP\ package\\
\texttt{atomic/} & \texttt{atomic} package\\
\texttt{GUI/} & \texttt{PWGui} package\\
\texttt{QEHeat/} & \texttt{QEHeat} package\\
\texttt{HP/} & \texttt{HP} package
\texttt{HP/} & \texttt{HP} package\\
\texttt{QEHeat/} & \texttt{QEHeat} package\\
\texttt{KCW/} & \texttt{KCW} package
\end{tabular}
\\
Finally, directory \texttt{COUPLE/} contains code and documentation
that is useful to call \qe\ programs from external codes; directory
\texttt{LR\_Modules/} contains source files for modules that are common
to all linear-response codes.
that is useful to call \qe\ programs from external codes.
\subsection{Prerequisites}
\label{Sec:Installation}
@ -428,12 +436,11 @@ formerly PGI compiler, freely available for download.
As a rule, \qe\ tries to keep compatibility with older compilers,
avoiding nonstandard extensions and newer features that are not
widespread or stabilized. If however your compiler is older say
than $\sim 5$ years or so it is quite likely that something will
not work. The same applies to mathematical and MPI libraries.
For GPU compilation, get the most recent NVidia HPC SDK you can:
while compilers from v. 17.4 on should work, several problems and
limitations are known to exist for old compiler versions.
widespread or stabilized. If however your compiler is older than
a few ($\sim 5$) years, it is likely that something will not work.
The same applies to mathematical and MPI libraries.
For GPU compilation, you need v.19.10 or later of the NVidia HPC SDK
(previous versions are no longer supported).
Big computing centers typically provide a Fortran compiler complete
with all needed libraries. Workstations or ``commodity'' machines
@ -444,7 +451,7 @@ software). Note that most commercial compilers are also available
free of charge under some conditions (e.g. academic or personal usage, no
support) and may provide MPI libraries and run-time software as well.
\subsection{Building with cMake}
\subsection{Building with CMake}
See \texttt{https://gitlab.com/QEF/q-e/-/wikis/Developers/CMake-build-system}.
@ -484,7 +491,8 @@ path, as specified in the PATH environment variable.
\texttt{include/qe\_cdefs.h} & (previously: \texttt{include/c\_defs.h})
a few definitions used by C files\\
\texttt{include/configure.h} & optional: info on compilation flags
(to enable its usage, \\ & uncomment \verb|#define __HAVE_CONFIG_INFO|
(to enable it, uncomment \\ &
\verb|#define __HAVE_CONFIG_INFO|
in Modules/environment.f90)\\
\end{tabular}\\
In addition, \configure\ generates (since v.7) files \texttt{make.depend},
@ -495,10 +503,11 @@ in any \texttt{Makefile}, type \texttt{make depend}, or run
It is convenient to use "parallel make" to speed up compilation:
\texttt{make -jN} compiles in parallel on N processors. Note that
if you interrupt \make, depending upon what it was doing
you may run into trouble the next time you type \make (for instance,
if \make\ is interrupted while unpacking and compiling the FoX library).
If so, run \texttt{make clean} before running \make\ again.
if you interrupt \make, you may run into trouble the next time you
type \make (for instance, if \make\ is interrupted while unpacking
and compiling an external library).
If so, run \texttt{make clean}, or even \texttt{make distclean},
before running \make\ again.
You should always be able to compile the \qe\ suite
of programs without having to edit any of the generated files. However you
@ -521,7 +530,7 @@ Some environment variables that are relevant to \configure\ are:
\begin{tabular}{ll}
\texttt{ARCH}& label identifying the machine type (see below)\\
\texttt{F90, F77, CC} &names of Fortran, Fortran-77, and C compilers\\
\texttt{F90, CC} &names of Fortran and C compilers\\
\texttt{MPIF90} & name of parallel Fortran 90 compiler (using MPI)\\
\texttt{CPP} & source file preprocessor (defaults to \$CC -E)\\
\texttt{LD} & linker (defaults to \$MPIF90)\\
@ -549,8 +558,8 @@ compilation.
If your machine type is unknown to \configure, you may use the
\texttt{ARCH}
variable to suggest an architecture among supported ones. Some large
parallel machines using a front-end (e.g. Cray XT) will actually
variable to suggest an architecture among supported ones. Some
parallel machines using a front-end may actually
need it, or else \configure\ will correctly recognize the front-end
but not the specialized compilation environment of those machines.
In some cases, cross-compilation requires to specify the target machine
@ -561,7 +570,7 @@ for NEC SX6 on a PC). Currently supported architectures are:
\begin{tabular}{ll}
\texttt{x86\_64} & Intel and AMD 64-bit running Linux\\
\texttt{arm} & ARM machines (with gfortran or armflang)\\
\texttt{crayxt4} & Cray XT4/XT5/XE machines\\
\texttt{craype} & Cray machines using Cray PE\\
\texttt{mac686} & Apple Intel machines running Mac OS X\\
\texttt{mingw32} & Cross-compilation for MS-Windows, using mingw, 32 bits\\
\texttt{mingw64} & As above, 64 bits\\
@ -597,7 +606,8 @@ is between bracket:\\
\\
and the following optional packages:\\
\begin{tabular}{ll}
\texttt{--with-scalapack}& (yes$|$no$|$intel) Use scalapack if available. \\
\texttt{--with-fox} & Use official FoX library instead of built-in replacement (default:no)\\
\texttt{--with-scalapack}& (yes$|$no$|$intel) Use scalapack if available. \\
&Set to \texttt{intel} to use Intel MPI and BLACS (default: use OpenMPI)\\
\texttt{--with-elpa-include}& Specify full path of ELPA include and modules
headers (no)\\
@ -621,10 +631,11 @@ and the following optional packages:\\
\end{tabular}\\
\\
In order to compile the code for GPU's you will need a recent version
-- the more recent, the better -- of the NVidia HPC software development
kit (SDK). OpenMP must be enabled, and you may want to use a CUDA-aware MPI
distribution if running on multiple GPUs in order to optimize the
interprocess data transfer. The following \configure\ options are
(v.19.10 or later: the more recent, the better) of the NVidia HPC software
development kit (SDK). OpenMP should be enabled. Enabling faster communications
between GPUs, via NVlink or Infiniband RDMA, is essential for optimal
performance. If your MPI library is built to be CUDA-aware, then enable it
with \texttt{--with-cuda-mpi=yes}. The following \configure\ options are
available:\\
\begin{tabular}{ll}
\texttt{--with-cuda=value}& enable compilation of GPU-accelerated subroutines.\\
@ -640,14 +651,11 @@ available:\\
& \texttt{value} must be consistent with the\\
& CUDA Toolkit installed on the workstation \\
& or available on the compute nodes of the HPC facility.\\
\texttt{--enable-cuda-env-check=[yes]}& if set, sanity checks on the CUDA environment\\
& are performed (default: no).
\end{tabular}\\
\texttt{--with-cuda-mpi=value} & enable usage of a CUDA-aware MPI library (default: no).\\
\end{tabular}
To modify or extend \configure, see the Wiki pages on GitLab:
\texttt{https://gitlab.com/QEF/q-e/-/wikis}.
For advanced users only!
To modify or extend \configure\ (advanced users only!), see the Wiki pages
on GitLab: \texttt{https://gitlab.com/QEF/q-e/-/wikis}.
\subsubsection{Manual configuration}
\label{SubSec:manconf}
@ -662,8 +670,9 @@ libraries (e.g. you need to add \texttt{-D\_\_FFTW} to \texttt{DFLAGS}
if you want to link internal FFTW). For a correct choice of preprocessing
flags, refer to the documentation in \texttt{include/defs.h.README}.
Even if \configure\ works, yuo may need to tweak the \texttt{make.inc}
file. It is very simple, but please note that if you change any settings
Even if \configure\ works, you may need to tweak the \texttt{make.inc}
file. It is very simple, but please note that a) you must know what you are
doing, and b) if you change any settings
(e.g. preprocessing, compilation flags)
after a previous, successful or failed, compilation, you must run
\texttt{make clean} before recompiling, unless you know exactly which
@ -674,7 +683,7 @@ unless you use option \texttt{--save}.
\subsection{Libraries}
\label{Sec:Libraries}
\qe\ contains a copy of some needed external libraries:
\qe\ downloads a copy of the following external libraries if needed:
\begin{itemize}
\item FoX for reading and writing xml files;
\item BLAS (\texttt{http://www.netlib.org/blas/}) and LAPACK
@ -881,7 +890,7 @@ Several functionals in \libxc\ depend on one or more external parameters. Some o
In order to see the available parameters for a given \libxc\ functional and their corresponding indexes, the \texttt{xc\_infos.x} program is available in \texttt{XClib} folder. For more details see Sec. \ref{SubSec:XCtest}.\\
The two routines can be called almost anywhere in \qe, however, as any other \texttt{XClib} setting routine, they must be declared through the \texttt{xc\_lib} module.\\
Without setting the external parameters inside the code, their default value will be assumed. This could lead to results different from the expectations.\\
In any case, when external parameters are needed by the chosen functionals, a warning message will appear in the output of \qe.
In any case, when external parameters are needed by the chosen functionals, a warning message will appear in the output of \qe. An example of Libxc parameter setting can be found in the \texttt{xclib\_test.f90} code (see below).
%
\paragraph{Functionals with partial output.}
A few \libxc\ functional routines provides the potential but not the energy. These functionals are available in \qe\ for all the families and their output energy is set to zero.
@ -1299,23 +1308,29 @@ paragraph.
\subsubsection{Cray machines}
For Cray XE machines:
\begin{verbatim}
$ module swap PrgEnv-cray PrgEnv-pgi
$ ./configure --enable-openmp --enable-parallel --with-scalapack
\end{verbatim}
For recent Cray machines, use \texttt{./configure ARCH=craype}.
This selects the \texttt{ftn} compiler, that typically uses
the \texttt{crayftn} compiler but may also use other ones,
depending upon the site and personal environment.
''Now, despite what people can imagine, every CRAY machine deployed can
have different environment. For example on the machine I usually use
for tests [...] I do have to unload some modules to make QE running
properly. On another CRAY [...] there is also Intel compiler as option
and the system is slightly different compared to the other.
So my recipe should work, 99\% of the cases.'' (info by Filippo Spiga)
and the system is slightly different compared to the other.''
(info by Filippo Spiga)
For Cray XT machines, use \texttt{./configure ARCH=crayxt4} or else
\configure\ will not recognize the Cray-specific software environment.
With the \verb|PrgEnv-cray| module, v.6.0.10, you get an internal compiler
error in \verb|esm_stres_mod.f90|. If so, reduce the optimization level
for that specific routine from \verb|-O3| to \verb|-O1|.
Older Cray machines: T3D, T3E, X1, are no longer supported.
If you want to use the Intel compiler instead, try something like:
\begin{verbatim}
$ module swap PrgEnv-cray PrgEnv-intel
$ ./configure ARCH=craype [--enable-openmp --enable-parallel --with-scalapack]
\end{verbatim}
Old Cray machines: T3D, T3E, X1, etc, are no longer supported.
\subsubsection{Obsolescent architectures}
@ -1630,6 +1645,15 @@ of output. Running several instances of a serial code with
\texttt{mpirun} or \texttt{mpiexec} produces strange crashes.
\paragraph{Trouble with input files}
Input files should be plain ASCII text. The presence of CRLF line
terminators (may appear as \^{}M, Control-M, characters at the end
of lines), tabulators, or non-ASCII characters (e.g. non-ASCII
quotation marks, that at a first glance may look the same as
the ASCII character) is a frequent source of trouble.
Typically, this happens with files coming from Windows or produced
with "smart" editors. Verify with command \texttt{file} and convert
with command \texttt{iconv} if needed.
Some implementations of the MPI library have problems with input
redirection in parallel. This typically shows up under the form of
mysterious errors when reading data. If this happens, use the option

View File

@ -133,3 +133,9 @@ qe_install_targets(
qe_epw_exe qe_zg_exe qe_zg_disca_exe qe_zg_pp_disca_exe qe_zg_bands_unfold_exe qe_zg_pp_spctrlfn_exe qe_zg_epsilon_Gaus_exe)
install(PROGRAMS bin/pp.py TYPE BIN RENAME epw_pp.py)
add_custom_target(epw
DEPENDS
qe_epw_exe
COMMENT
"electron-Phonon Coupling with wannier functions")

View File

@ -1,4 +1,4 @@
Marios Zacharias [1] & Feliciano Giustino [2,3], November 2021
Marios Zacharias [1] & Feliciano Giustino [2,3], May 2022
[1] Department of Mechanical and Materials Science Engineering, Cyprus University of Technology,
P.O. Box 50329, 3603 Limassol, Cyprus
@ -26,7 +26,7 @@ Acknowledgement: We thank Hyungjun Lee, Oden Institute for Computational Enginee
Executables in ZG folder
------------------------
ZG.x ---> for generating ZG configurations
ZG.x ---> for generating ZG configurations, ZG diffuse scattering, phonon unflolding
bands_unfold.x ---> for performing band structure unfolding in supercell calculations
pp_spctrlfn.x ---> for obtaining the electron spectral function after bands_unfold.x
epsilon_Gaus.x ---> for calculating optical properties as in epsilon.x but Gaussian broadening
@ -34,13 +34,17 @@ disca.x ---> for calculating one-phonon and all-phonon inelastic scatte
pp_disca.x ---> for applying broadening and setting a resolution of scattering patterns
src/local folder ---> for post-processing. Compile them by "./compile_gfortran.sh"
---------------------------------------------------------------------------------------------------
For full instructions on how to run the exercises in the tarball "tutorial.tar.gz"
please refer to the EPW documentation site, or send an email to Marios Zacharias:
zachariasmarios@gmail.com
Links for input flags and tutorials:
https://epwdoc.gitlab.io/source/doc/InputsZG.html
https://epwdoc.gitlab.io/source/doc/TutorialZG.html
---------------------------------------------------------------------------------------------------
Instructions for the construction of the Zacharias-Giustino "ZG" displacement following
Eq. (2) of Phys. Rev. Research 2, 013357, 2020. The approach for generating the ZG-displacement
@ -84,6 +88,10 @@ STEPS for generating the "ZG-displacement" for the calculation of temperature-de
---------------------------------------------------------------------------------------
i) "ZG_conf" : Logical flag that enables the creation of the ZG-displacement.
(default .true.)
"flscf" : String for the name of the scf input file used to calculate the phonons. The
code will read information for preparing the input file of the supercell calculation.
If left empty the code will not generate the input file of the supercell calculation.
(default ' ')
"T" : Real number indicating the temperature at which the calculations will be performed.
"T" essentially defines the amplitude of the normal coordinates.
(default 0.00)
@ -94,6 +102,11 @@ STEPS for generating the "ZG-displacement" for the calculation of temperature-de
(default 0, 0, 0)
"atm_zg(1), etc.." : String describing the element of each atomic species
(default "Element")
"qhat_in" : Vector with three real entries for specifying the direction qhat
for the non-analytic part when dim1=dim2=dim3=1.
Use for example "qhat_in(1) = 0.1, qhat_in(2) =0.0, qhat_in(3) = 0.0"
to account for LO-TO splitting from the direction [1 0 0].
(default 0.1,0.1,0.1)
"synch" : Logical flag that enables the synchronization of the modes.
(default .false.)
"niters" : Integer for the number of iterations the algorithm needs to
@ -127,18 +140,32 @@ STEPS for generating the "ZG-displacement" for the calculation of temperature-de
(default .false.)
"q_external" : Logical flag that allows the use of a q-point list specified by the user in the input file.
If .false. the q-point list is specified by the supercell dimensions dim1, dim2, and dim3.
If .true. the q-point list must be provided by the user (see "qlist_AB.txt").
If .false. any q-point list after the input flags is ignored.
If .true. the q-point list must be provided by the user (or see "qlist_AB.txt").
IF ph_unfold = .true. then q_external = .true. automatically and the q-path is provided as
in a standard phonon dispersion calculation.
(default .false.)
"qlist_AB.txt" : This file contains the external q-list in crystal coordinates as in the "ZG_444.in" example,
after the input flags. It corresponds to the q-points commensurate to the supercell size.
Only one of the q-point time-reversal partners is kept for the construction of the
ZG-displacement. The calculations, for the moment, assume systems with time-reversal symmetry.
For the generation of the "qlist_AB.txt" set the q-gird in file
"example/silicon/ZG_structure/input/qlist.in" and run "../../../src/create_qlist.x < qlist.in > qlist.out".
"example/silicon/input/qlist.in" and run "../../../src/create_qlist.x < qlist.in > qlist.out".
One can modify the "create_qlist.f90" to generate a different path for consecutive q-points.
Paste the output of "qlist_AB.txt" to "ZG.in" after namelist &input. Set the flag
q_external = .true. for the code to read the list.
q_external = .true. for the code to read the list.
"ph_unfold" : Logical flag to activate phonon unfolding procedure. (default: .false.). To perform phonon
unfolding ZG_conf must be set to .false.. If ph_unfold = .true. then q_external = .true.
"flfrq" : Output file for frequencies to printed with unfolding weights (default: 'frequencies.dat')
"flweights" : Output file for unfolding weights to printed with frequncies (default: 'unfold_weights.dat')
"ng1","ng2","ng3" : Integers corresponding to the (h k l) indices of the reciprocal lattice vector g.
Increase their values to check convergence. Default is a good starting point.
(default 10,10,10)
ii) To generate the ZG-displacement run "/path_to_your_espresso/bin/ZG.x <ZG.in> ZG.out".
This generates three output files: the "equil_pos.dat", "ZG-configuration.dat" and "ZG-velocities.dat".
The first file has the equilibrium coordinates of the nuclei and the second has the optimum set of nuclear coordinates

File diff suppressed because it is too large Load Diff

View File

@ -131,8 +131,9 @@ PROGRAM do_bands
no_overlap=.true.
ENDIF
IF (lsym) no_overlap=.true.
IF ( npool > 1 .and..not.(lsym.or.no_overlap)) CALL errore('bands', &
IF ( npool > 1 .and. poors_man ) CALL errore('bands_unfold', &
'pools not implemented',npool)
IF ( npool > 1 .and..not.(lsym.or.no_overlap)) CALL errore('bands_unfold', &
'pools not implemented',npool)
IF ( spin_component < 1 .OR. spin_component > 2 ) &
CALL errore('bands','incorrect spin_component',1)
@ -225,7 +226,7 @@ SUBROUTINE punch_band (filband, spin_component, lsigma, no_overlap,dim1,dim2,dim
!mz_b
logical, intent(in) :: poors_man
INTEGER, intent(in) :: dim1,dim2,dim3
REAL(DP), ALLOCATABLE :: g_mz(:,:), g_mz_or(:,:)
REAL(DP), ALLOCATABLE :: g_mz(:,:)
INTEGER :: ctr,ctr2, kbnd
INTEGER :: i_mz, ig_mz, kkx, kky, kkz
REAL(DP), ALLOCATABLE :: P_mk(:,:), et_mz(:) !!,
@ -297,7 +298,7 @@ SUBROUTINE punch_band (filband, spin_component, lsigma, no_overlap,dim1,dim2,dim
CALL allocate_bec_type(nkb, nbnd, becp_mz)
!
!
IF (poors_man) ALLOCATE(P_mk(nbnd,nks))
IF (poors_man) ALLOCATE(P_mk(nbnd,nkstot))
IF (poors_man) P_mk(:,:) = 0.0d0
! mz_e
DO ik = nks1, nks2
@ -413,14 +414,10 @@ SUBROUTINE punch_band (filband, spin_component, lsigma, no_overlap,dim1,dim2,dim
!
!mz_b
call cryst_to_cart( npw, g, at, -1 ) ! here we convert them to crystal coordinates
ALLOCATE(g_mz(3,ngm),g_mz_or(3,ngm))
ALLOCATE(g_mz(3,ngm))
!
g_mz(:,:) = g(:,:) ! save reciproval lattice vectors G = m1*B1+m2*B2+m3*B3
g_mz(:,:) = g(:,:) ! save reciprocal lattice vectors G = m1*B1+m2*B2+m3*B3
!
!
!
!
!
! Calculate the poor's man spectral weights
IF ( poors_man ) THEN
IF (noncolin) ALLOCATE(evc_new(npol*npwx,nbnd)) ! to compute becp contribution PAW or ultrasof
@ -521,11 +518,12 @@ SUBROUTINE punch_band (filband, spin_component, lsigma, no_overlap,dim1,dim2,dim
ENDIF ! noncol
END DO
! Bring them back to cartesian, for the main loops
call cryst_to_cart( npw, g, bg, 1 )
call cryst_to_cart( npw, g, bg, 1 )
!
!
DEALLOCATE(psi_mz,g_mz,g_mz_or)
DEALLOCATE(psi_mz,g_mz)
ENDDO ! k-loop
!call mp_sum( P_mk, intra_bgrp_comm ) ! collect P_mk
!
!
IF (noncolin) CALL poolrecover(sigma_avg,4*nbnd,nkstot,nks)
@ -536,9 +534,9 @@ SUBROUTINE punch_band (filband, spin_component, lsigma, no_overlap,dim1,dim2,dim
OPEN (unit = 44, file = filename_mz, status = 'unknown', form = &
'formatted', iostat = ios(0))
WRITE (44, '(" &plot nbnd=",i4,", nks=",i6," /")') &
nbnd, nks
DO ik=nks1,nks2
! WRITE(*,*) "vizoula2", ik, nks1, nks2, nks1tot,nks2tot
nbnd, nks2tot-nks1tot+1
DO ik=nks1tot,nks2tot
!WRITE(*,*) "hiiii", ik, nks1, nks2, nks1tot,nks2tot
WRITE (44, '(10x,3f10.6)') dble(xk(1,ik)/dim1), dble(xk(2,ik)/dim2), dble(xk(3,ik)/dim3) !
WRITE (44,'(10f14.6)') ( et(i_mz,ik)*rytoev, i_mz = 1, nbnd) ! write the energies of the supercell E_mK
ENDDO
@ -548,18 +546,15 @@ SUBROUTINE punch_band (filband, spin_component, lsigma, no_overlap,dim1,dim2,dim
filename_mz = 'spectral_weights' // TRIM( pointer_mz ) // '.dat'
OPEN (unit = 25, file = filename_mz, status = 'unknown', form = &
'formatted', iostat = ios(0))
WRITE (25, '(" &plot nbnd=",i4,", nks=",i6," /")') nbnd
DO ik=nks1,nks2
WRITE (25, '(" &plot nbnd=",i4,", nks=",i6," /")') &
nbnd, nks2tot-nks1tot+1
DO ik=nks1tot,nks2tot
! We write the header for the output files and the new band structure
WRITE (25, '(10x,3f10.6)') dble(xk(1, ik) / dim1), dble(xk(2, ik) / dim2),&
dble(xk(3, ik) / dim3) !xk(1,ik)/dim1, xk(2,ik)/dim2, xk(3,ik)/dim3
WRITE (25,'(10f12.6)') ( P_mk(i_mz, ik), i_mz = 1, nbnd)
CLOSE(25)
ENDDO! ik
! Final sum rule
!!DO ibnd = 1, nbnd
!! WRITE(*,*) "sum_rule_completeness", sum(P_mk(ibnd,ik,:))
!!END DO
ENDDO! ik
CLOSE(25)
!
END IF ! poorsman
ENDIF
@ -884,61 +879,6 @@ SUBROUTINE punch_plottable_bands ( filband, nks1tot, nks2tot, nkstot, nbnd, &
RETURN
!
END SUBROUTINE punch_plottable_bands
!cdiagh2 copy from PHonon/PH/rigid.f90
!
subroutine cdiagh2 (n,h,ldh,e,v)
!-----------------------------------------------------------------------
!
! calculates all the eigenvalues and eigenvectors of a complex
! hermitean matrix H . On output, the matrix is unchanged
!
use kinds, only: dp
implicit none
!
! on INPUT
integer n, &! dimension of the matrix to be diagonalized
& ldh ! leading dimension of h, as declared
! in the calling pgm unit
complex(DP) h(ldh,n) ! matrix to be diagonalized
!
! on OUTPUT
real(DP) e(n) ! eigenvalues
complex(DP) v(ldh,n) ! eigenvectors (column-wise)
!
! LOCAL variables (LAPACK version)
!
integer lwork, &! aux. var.
& ILAENV, &! function which gives block size
& nb, &! block size
& info ! flag saying if the exec. of libr. routines was ok
!
real(DP), allocatable:: rwork(:)
complex(DP), allocatable:: work(:)
!
! check for the block size
!
nb = ILAENV( 1, 'ZHETRD', 'U', n, -1, -1, -1 )
if (nb.lt.1) nb=max(1,n)
if (nb.eq.1.or.nb.ge.n) then
lwork=2*n-1
else
lwork = (nb+1)*n
endif
!
! allocate workspace
!
call zcopy(n*ldh,h,1,v,1)
allocate(work (lwork))
allocate(rwork (3*n-2))
call ZHEEV('V','U',n,v,ldh,e,work,lwork,rwork,info)
call errore ('cdiagh2','info =/= 0',abs(info))
! deallocate workspace
deallocate(rwork)
deallocate(work)
!
return
end subroutine cdiagh2
!-----------------------------------------------------------------------
FUNCTION cgracsc_nc (nkb, bec1, bec2, nhm, ntyp, nh, nat, ityp, npol, upf)
!-----------------------------------------------------------------------

View File

@ -1,6 +1,6 @@
program kpoints_unfold
integer :: nk,sdim
integer :: nk,sdim1,sdim2,sdim3
! counter
integer :: i, y_n
DOUBLE PRECISION, ALLOCATABLE :: kpts_mat(:,:),v2(:), v(:)
@ -15,13 +15,6 @@ program kpoints_unfold
read(*,*) kpts_mat(i,:)
ENDDO
!
!
!
WRITE(*,*) "New high-sym kpts after Symmetry operation:"
DO i=1,nk
WRITE(*,*) kpts_mat(i,:)
ENDDO
!
!
WRITE(*,*) "Write x-positions of high-sym kpts"
ALLOCATE(v2(nk))
@ -40,12 +33,14 @@ program kpoints_unfold
v(i)=(v2(i+1)-v2(i))/step
ENDDO
v(nk)=1 ! this should be always one because is the last one
WRITE(*,*) "Supercell size ?"
read(*,*) sdim
WRITE(*,*) "Supercell size (n * m * p) ?"
read(*,*) sdim1, sdim2, sdim3
!
!
WRITE(*,*) "kpts to use for Supercell calculation:"
kpts_mat(:,:)=sdim*kpts_mat(:,:)
kpts_mat(:,1)=sdim1*kpts_mat(:,1)
kpts_mat(:,2)=sdim2*kpts_mat(:,2)
kpts_mat(:,3)=sdim3*kpts_mat(:,3)
DO i=1,nk
WRITE(*,'(3F11.6,I4)') kpts_mat(i,:), abs(nint(v(i)))
ENDDO

Binary file not shown.

View File

@ -31,14 +31,24 @@
etf_mem, epwwrite, epwread, nbndsub, fermi_plot, &
eps_acustic, ephwrite, epbread, nsiter, nqstep, &
nqsmear, nqf3, nqf2, nqf1, nkf3, nkf2, nkf1, &
muc, mp_mesh_q, mp_mesh_k, max_memlt, lunif, &
!!!!!
!muc, mp_mesh_q, mp_mesh_k, max_memlt, lunif, &
muc, mp_mesh_q, mp_mesh_k, max_memlt, &
!!!!!
lreal, lpolar, lpade, liso, limag, laniso, npade, &
specfun_el, specfun_ph, lifc, asr_typ, &
lscreen, scr_typ, fermi_diff, smear_rpa, &
rand_q, rand_nq, rand_nk, rand_k, pwc, phonselfen,&
!!!!!
!rand_q, rand_nq, rand_nk, rand_k, pwc, phonselfen,&
rand_q, rand_nq, rand_nk, rand_k, phonselfen, &
!!!!!
specfun_pl, cumulant, bnd_cum, iterative_bte, &
nw_specfun, nw, nswi, nswfc, nswc, nstemp, nsmear,&
wsfc, wscut, write_wfn, wmin_specfun, wmin, &
!!!!!
!nw_specfun, nw, nswi, nswfc, nswc, nstemp, nsmear,&
!wsfc, wscut, write_wfn, wmin_specfun, wmin, &
nw_specfun, nw, nswi, nstemp, nsmear, &
wscut, write_wfn, wmin_specfun, wmin, &
!!!!!
wmax_specfun, wmax, wepexst, wannierize, &
vme, longrange, shortrange, system_2d, lindabs, &
temps, tempsmin, tempsmax, delta_approx, title, &
@ -55,9 +65,10 @@
epw_crysym, bfieldx, bfieldy, bfieldz, tc_linear, &
!!!!!
!tc_linear_solver, mob_maxfreq, mob_nfreq
tc_linear_solver, mob_maxfreq, mob_nfreq, ii_g, &
ii_charge, ii_n, ii_scattering, ii_only, &
ii_lscreen, ii_eda, ii_partion, ii_eps0
ii_g, ii_charge, ii_n, ii_scattering, ii_only, &
ii_lscreen, ii_eda, ii_partion, ii_eps0, &
tc_linear_solver, mob_maxfreq, mob_nfreq, &
fbw, gridsamp, griddens, dos_del, muchem
!!!!!
USE elph2, ONLY : elph
USE mp, ONLY : mp_bcast
@ -126,11 +137,17 @@
CALL mp_bcast(laniso , meta_ionode_id, world_comm)
CALL mp_bcast(tc_linear , meta_ionode_id, world_comm)
CALL mp_bcast(tc_linear_solver, meta_ionode_id, world_comm)
!!!!!
CALL mp_bcast(fbw , meta_ionode_id, world_comm)
CALL mp_bcast(gridsamp , meta_ionode_id, world_comm)
CALL mp_bcast(griddens , meta_ionode_id, world_comm)
CALL mp_bcast(dos_del , meta_ionode_id, world_comm)
CALL mp_bcast(muchem , meta_ionode_id, world_comm)
!!!!!
CALL mp_bcast(lpolar , meta_ionode_id, world_comm)
CALL mp_bcast(lifc , meta_ionode_id, world_comm)
CALL mp_bcast(lscreen , meta_ionode_id, world_comm)
CALL mp_bcast(cumulant , meta_ionode_id, world_comm)
CALL mp_bcast(lunif , meta_ionode_id, world_comm)
CALL mp_bcast(kerwrite , meta_ionode_id, world_comm)
CALL mp_bcast(kerread , meta_ionode_id, world_comm)
CALL mp_bcast(imag_read , meta_ionode_id, world_comm)
@ -199,8 +216,6 @@
CALL mp_bcast(nqf3 , meta_ionode_id, world_comm)
CALL mp_bcast(nqsmear , meta_ionode_id, world_comm)
CALL mp_bcast(nqstep , meta_ionode_id, world_comm)
CALL mp_bcast(nswfc , meta_ionode_id, world_comm)
CALL mp_bcast(nswc , meta_ionode_id, world_comm)
CALL mp_bcast(nswi , meta_ionode_id, world_comm)
CALL mp_bcast(broyden_ndim, meta_ionode_id, world_comm)
CALL mp_bcast(nstemp , meta_ionode_id, world_comm)
@ -225,8 +240,10 @@
CALL mp_bcast(eps_acustic , meta_ionode_id, world_comm)
CALL mp_bcast(degaussq , meta_ionode_id, world_comm)
CALL mp_bcast(delta_qsmear , meta_ionode_id, world_comm)
CALL mp_bcast(pwc , meta_ionode_id, world_comm)
CALL mp_bcast(wsfc , meta_ionode_id, world_comm)
!!!!!
! CALL mp_bcast(pwc , meta_ionode_id, world_comm)
! CALL mp_bcast(wsfc , meta_ionode_id, world_comm)
!!!!!
CALL mp_bcast(wscut , meta_ionode_id, world_comm)
CALL mp_bcast(broyden_beta , meta_ionode_id, world_comm)
CALL mp_bcast(tempsmin , meta_ionode_id, world_comm)

View File

@ -133,7 +133,10 @@
!! @ Note:
!! If you have 19 kpts and 2 pool, this routine will return
!! lower_bnd= 1 and upper_bnd=10 for the first pool
!! lower_bnd= 1 and upper_bnd=9 for the second pool
!!!!! a comment line is replaced with another one!
! !! lower_bnd= 1 and upper_bnd=9 for the second pool
!! lower_bnd=11 and upper_bnd=19 for the second pool
!!!!!
!-----------------------------------------------------------------------
!
USE mp_global, ONLY : my_pool_id, npool
@ -194,7 +197,10 @@
!! @ Note:
!! If you have 19 kpts and 2 pool, this routine will return
!! lower_bnd= 1 and upper_bnd=10 for the first pool
!! lower_bnd= 1 and upper_bnd=9 for the second pool
!!!!! a comment line is replaced with another one!
! !! lower_bnd= 1 and upper_bnd=9 for the second pool
!! lower_bnd=11 and upper_bnd=19 for the second pool
!!!!!
!-----------------------------------------------------------------------
!
USE mp_global, ONLY : my_pool_id, npool

View File

@ -60,6 +60,8 @@
USE xc_lib, ONLY : xclib_dft_is
USE elph2, ONLY : lower_band, upper_band, ibndstart
USE constants_epw, ONLY : czero, eps12
USE Coul_cut_2D, ONLY : do_cutoff_2D
USE Coul_cut_2D_ph, ONLY : cutoff_localq
!
IMPLICIT NONE
!
@ -161,6 +163,10 @@
gu = gu0 + g(1, ig) * u1 + g(2, ig) * u2 + g(3, ig) * u3
aux1(dffts%nl(ig)) = aux1(dffts%nl(ig)) + vlocq(ig, nt) * gu * fact * gtau
ENDDO
IF (do_cutoff_2D) THEN
CALL cutoff_localq(aux1, fact, u1, u2, u3, gu0, nt, na)
ENDIF
!
ENDIF
ENDDO
!
@ -599,6 +605,7 @@
!!
!! Roxana Margine - Dec 2018: Updated based on QE 6.3
!! SP: Sept. 2019 - Cleaning
!! SP: Jan. 2022 - Addition 2D Coulomb
!!
!! HL: Mar 2020 - Parallelization over G using intra image communicator
!!
@ -620,6 +627,8 @@
USE constants_epw, ONLY : zero, czero
USE mp_images, ONLY : intra_image_comm
USE elph2, ONLY : veff, ig_s, ig_e
USE Coul_cut_2D, ONLY : do_cutoff_2D
USE Coul_cut_2D_ph, ONLY : lr_Vlocq
!
IMPLICIT NONE
!
@ -771,12 +780,22 @@
! nb is the atom of the augmentation function
!
nta = ityp(na)
DO ig = 1, ngvec
sk(ig) = vlocq(ig + ig_s - 1, nta) &
* eigts1(mill(1, ig + ig_s - 1), na) &
* eigts2(mill(2, ig + ig_s - 1), na) &
* eigts3(mill(3, ig + ig_s - 1), na)
ENDDO
!
IF (do_cutoff_2D) THEN
DO ig = 1, ngvec
sk(ig) = (vlocq(ig + ig_s - 1, nta) + lr_Vlocq (ig + ig_s - 1, nta)) &
* eigts1(mill(1, ig + ig_s - 1), na) &
* eigts2(mill(2, ig + ig_s - 1), na) &
* eigts3(mill(3, ig + ig_s - 1), na)
ENDDO
ELSE
DO ig = 1, ngvec
sk(ig) = vlocq(ig + ig_s - 1, nta) &
* eigts1(mill(1, ig + ig_s - 1), na) &
* eigts2(mill(2, ig + ig_s - 1), na) &
* eigts3(mill(3, ig + ig_s - 1), na)
ENDDO
ENDIF
!
DO ipol = 1, 3
DO ig = 1, ngvec

View File

@ -14,10 +14,10 @@
!!
USE io_global, ONLY : stdout
USE epwcom, ONLY : liso, fila2f, gap_edge, lreal, limag, laniso, &
tc_linear
tc_linear, fbw
USE eliashbergcom, ONLY : gap0
USE supercond, ONLY : eliashberg_init, evaluate_a2f_lambda, &
estimate_tc_gap, deallocate_eliashberg_elphon
USE supercond, ONLY : eliashberg_init, estimate_tc_gap, find_a2f, &
deallocate_eliashberg_elphon
USE io_eliashberg, ONLY : read_a2f, read_frequencies, read_eigenvalues, &
read_ephmat, read_kqmap
USE supercond_iso, ONLY : eliashberg_iso_iaxis, eliashberg_iso_raxis, &
@ -30,7 +30,11 @@
!
IF (liso) THEN
WRITE(stdout, '(/5x, a)') REPEAT('=', 67)
WRITE(stdout, '(5x, "Solve isotropic Eliashberg equations")')
IF (fbw) THEN
WRITE(stdout, '(5x, "Solve full-bandwidth isotropic Eliashberg equations")')
ELSE
WRITE(stdout, '(5x, "Solve isotropic Eliashberg equations")')
ENDIF
WRITE(stdout, '(5x, a/)') REPEAT('=', 67)
CALL eliashberg_init()
IF (fila2f == ' ') THEN
@ -38,7 +42,7 @@
CALL read_eigenvalues()
CALL read_kqmap()
CALL read_ephmat()
CALL evaluate_a2f_lambda()
CALL find_a2f()
CALL deallocate_eliashberg_elphon()
ENDIF
!
@ -55,14 +59,19 @@
!
IF (laniso) THEN
WRITE(stdout, '(/5x, a)') REPEAT('=', 67)
WRITE(stdout, '(5x, "Solve anisotropic Eliashberg equations")')
IF (fbw) THEN
WRITE(stdout, '(5x, "Solve full-bandwidth anisotropic Eliashberg equations")')
ELSE
WRITE(stdout, '(5x, "Solve anisotropic Eliashberg equations")')
ENDIF
WRITE(stdout, '(5x, a/)') REPEAT('=', 67)
CALL eliashberg_init()
CALL read_frequencies()
CALL read_eigenvalues()
CALL read_kqmap()
CALL read_ephmat()
CALL evaluate_a2f_lambda()
CALL find_a2f()
CALL read_a2f()
CALL estimate_tc_gap()
IF (gap_edge > 0.d0) THEN
gap0 = gap_edge
@ -79,7 +88,8 @@
CALL read_eigenvalues()
CALL read_kqmap()
CALL read_ephmat()
CALL evaluate_a2f_lambda()
CALL find_a2f()
CALL read_a2f()
CALL estimate_tc_gap()
CALL deallocate_eliashberg_elphon()
ENDIF

View File

@ -19,8 +19,12 @@
!
INTEGER :: nsw
!! Nr. of grid points between (0,wscut) for real-axis, analytical continuation and Pade approximants
INTEGER :: ndos
!! Nr. of energy bins in Fermi window for dos
INTEGER, ALLOCATABLE :: nsiw(:)
!! Nr of grid points at each temperature on imag-axis, nsiw(nstemp)
!! Nr. of grid points at each temperature on imag-axis, nsiw(nstemp)
INTEGER, ALLOCATABLE :: wsn(:)
!! frequency "indices" on imag-axis at iw, wsn(nsiw(nstemp))
!
REAL(KIND = DP) :: wsphmax
!! maximum phonon frequency for evaluation of the integral over Omega (0, wsphmax)
@ -28,14 +32,18 @@
!! frequency step for Eliashberg spectral function
REAL(KIND = DP) :: gap0
!! initial guess for delta
REAL(KIND = DP), ALLOCATABLE :: dws(:)
!! grid size at each bin dws(nsw)
REAL(KIND = DP) :: muintr
!! superconducting (interacting) chemical potential
REAL(KIND = DP), ALLOCATABLE :: ws(:)
!! frequency on real-axis, ws(nsw)
REAL(KIND = DP), ALLOCATABLE :: wsph(:)
!! frequency on real-axis, wsph(nqstep)
REAL(KIND = DP), ALLOCATABLE :: wsi(:)
!! frequency on imag-axis at iw, wi(nsiw(nstemp))
REAL(KIND = DP), ALLOCATABLE :: en(:)
!! Energy grid over Fermi window
REAL(KIND = DP), ALLOCATABLE :: dosen(:)
!! DOS (state/spin/eV/u.c.) over Fermi window
!
!--------------------------------------------------------------------------
END MODULE eliashberg_common
@ -53,8 +61,6 @@
!
REAL(KIND = DP), ALLOCATABLE :: a2f_iso(:)
!! isotropic Eliashberg spectral function a2f_iso(nqstep)
REAL(KIND = DP), ALLOCATABLE :: gap(:)
!! superconducting gap edge gap(nstemp)
REAL(KIND = DP), ALLOCATABLE :: fdwp(:)
!! Fermi-Dirac distribution at frequency wp, fdwp(nsw)
REAL(KIND = DP), ALLOCATABLE :: bewph(:)
@ -77,6 +83,14 @@
!! -bose(omegap)-fermi( omega+omegap) (eqn for delta and znorm analytic continuation)
REAL(KIND = DP), ALLOCATABLE :: gm(:, :)
!! bose(omegap)+fermi(-omega+omegap) (eqn for delta and znorm analytic continuation)
REAL(KIND = DP), ALLOCATABLE :: znormip(:)
!! renormalization function on imag-axis at iwp, znormip(nsiw(nstemp))
REAL(KIND = DP), ALLOCATABLE :: shifti(:)
!! energy shift on imag-axis at iw, shifti(nsiw(nstemp))
REAL(KIND = DP), ALLOCATABLE :: shiftip(:)
!! energy shift on imag-axis at iwp, shiftip(nsiw(nstemp))
REAL(KIND = DP), ALLOCATABLE :: orderi(:)
!! order paramter on the imag-axis at iw, orderi(nsiw(nstemp))
!
COMPLEX(KIND = DP), ALLOCATABLE :: delta(:)
!! gap function on real-axis at iw
@ -90,6 +104,8 @@
!! phonon kernel on real-axis (eqn for delta)
COMPLEX(KIND = DP), ALLOCATABLE :: km(:, :)
!! phonon kernel on real-axis (eqn for znorm)
COMPLEX(KIND = DP), ALLOCATABLE :: shift(:)
!! energy shift on real-axis at iw
!
!--------------------------------------------------------------------------
END MODULE eliashberg_common_iso
@ -141,36 +157,44 @@
REAL(KIND = DP), ALLOCATABLE :: wkfs(:)
!! weights of the irreducible k-points wkf(nkfs)
REAL(KIND = DP), ALLOCATABLE :: a2fij(:, :, :, :, :)
!! spectral function a2fij(nkfs_pool,nqftot,nbndfs,nbndfs,nqstep)
!! spectral function a2fij(nqstep,nbndfs,nqftot,nbndfs,nkfs_pool)
REAL(KIND = DP), ALLOCATABLE :: w0g(:, :)
!! approximation for delta function w0g(nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: agap(:, :, :)
!! superconducting gap edge agap(nkfs,nbndfs,nstemp)
REAL(KIND = DP), ALLOCATABLE :: agap(:, :)
!! superconducting gap edge agap(nkfs,nbndfs)
REAL(KIND = DP), ALLOCATABLE :: adeltai(:, :, :)
!! gap function on imag-axis at iw, adeltai(nbndfs,nkfs,nsiw(nstemp))
!! gap function on imag-axis at iw, adeltai(nsiw(itemp),nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: adeltaip(:, :, :)
!! gap function on imag-axis at iwp, adeltaip(nbndfs,nkfs,nsiw(nstemp))
!! gap function on imag-axis at iwp, adeltaip(nsiw(itemp),nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: aznormi(:, :, :)
!! renormalization function on imag-axis at iw, aznormi(nbndfs,nkfs,nsiw(nstemp))
!! renormalization function on imag-axis at iw, aznormi(nsiw(itemp),nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: naznormi(:, :, :)
!! normal state renormalization function on imag-axis at iw, naznormi(nbndfs,nkfs,nsiw(nstemp))
!! normal state renormalization function on imag-axis at iw, naznormi(nsiw(itemp),nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: akeri(:, :, :, :, :)
!! phonon kernel on imag-axis, akeri(nkfs,nqftot,nbndfs,nbndfs,2*nsiw(nstemp))
!! phonon kernel on imag-axis, akeri(2*nsiw(nstemp),nbndfs,nqftot,nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: adsumi(:, :, :)
!! contribution to delta eqn from the imaginary-axis in the analytic continuation adsumi(nbndfs,nkfs,nsw)
!! contribution to delta eqn from the imaginary-axis in the analytic continuation adsumi(nsw,nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: azsumi(:, :, :)
!! contribution to znorm eqn from the imaginary-axis in the analytic continuation azsumi(nbndfs,nkfs,nsw)
!! contribution to znorm eqn from the imaginary-axis in the analytic continuation azsumi(nsw,nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: memlt_pool(:)
!! maximum allocatable memory per pool
REAL(KIND = DP), ALLOCATABLE :: aznormip(:, :, :)
!! renormalization function on imag-axis at iwp, aznormip(nsiw(itemp),nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: ashifti(:, :, :)
!! energy shift on imag-axis at iw, ashifti(nsiw(itemp),nbndfs,nkfs)
REAL(KIND = DP), ALLOCATABLE :: ashiftip(:, :, :)
!! energy shift on imag-axis at iwp, ashiftip(nsiw(itemp),nbndfs,nkfs)
!
COMPLEX(KIND = DP), ALLOCATABLE :: aznorm(:, :, :)
!! renormalization function on real-axis aznorm(nbndfs,nkfs,nsw)
!! renormalization function on real-axis aznorm(nsw,nbndfs,nkfs)
COMPLEX(KIND = DP), ALLOCATABLE :: aznormp(:, :, :)
!! renormalization function on real-axis aznormkq(nbndfs,nkfs,nsw)
!! renormalization function on real-axis aznormkq(nsw,nbndfs,nkfs)
COMPLEX(KIND = DP), ALLOCATABLE :: adelta(:, :, :)
!! gap function on real-axis adelta(nbndfs,nkfs,nsw)
!! gap function on real-axis adelta(nsw,nbndfs,nkfs)
COMPLEX(KIND = DP), ALLOCATABLE :: adeltap(:, :, :)
!! gap function on real-axis adeltap(nbndfs,nkfs,nsw)
!! gap function on real-axis adeltap(nsw,nbndfs,nkfs)
COMPLEX(KIND = DP), ALLOCATABLE :: ashift(:, :, :)
!! energy shift on real-axis ashift(nsw,nbndfs,nkfs)
!
!--------------------------------------------------------------------------
END MODULE eliashberg_common_aniso

View File

@ -265,32 +265,39 @@
!
! Read in external electronic eigenvalues. e.g. GW
!
ALLOCATE(et_ks(nbnd, nks), STAT = ierr)
IF (ierr /= 0) CALL errore('elphon_shuffle_wrap', 'Error allocating et_ks', 1)
et_ks(:, :) = zero
IF (eig_read) THEN
IF (meta_ionode) THEN
WRITE(stdout, '(5x, a, i5, a, i5, a)') "Reading external electronic eigenvalues (", &
nbnd, ",", nkstot,")"
tempfile = TRIM(prefix) // '.eig'
OPEN(iuqpeig, FILE = tempfile, FORM = 'formatted', ACTION = 'read', IOSTAT = ios)
IF (ios /= 0) CALL errore('elphon_shuffle_wrap', 'error opening' // tempfile, 1)
READ(iuqpeig, '(a)') line
DO ik = 1, nkstot
! We do not save the k-point for the moment ==> should be read and
! tested against the current one
IF (.NOT. epbread .AND. .NOT. epwread) THEN
ALLOCATE(et_ks(nbnd, nks), STAT = ierr)
IF (ierr /= 0) CALL errore('elphon_shuffle_wrap', 'Error allocating et_ks', 1)
et_ks(:, :) = zero
IF (eig_read) THEN
IF (meta_ionode) THEN
WRITE(stdout, '(5x, a, i5, a, i5, a)') "Reading external electronic eigenvalues (", &
nbnd, ",", nkstot,")"
tempfile = TRIM(prefix) // '.eig'
OPEN(iuqpeig, FILE = tempfile, FORM = 'formatted', ACTION = 'read', IOSTAT = ios)
IF (ios /= 0) CALL errore('elphon_shuffle_wrap', 'error opening' // tempfile, 1)
READ(iuqpeig, '(a)') line
READ(iuqpeig, *) et_tmp(:, ik)
ENDDO
CLOSE(iuqpeig)
! from eV to Ryd
et_tmp = et_tmp / ryd2ev
DO ik = 1, nkstot
! We do not save the k-point for the moment ==> should be read and
! tested against the current one
READ(iuqpeig, '(a)') line
READ(iuqpeig, *) et_tmp(:, ik)
ENDDO
CLOSE(iuqpeig)
! from eV to Ryd
et_tmp = et_tmp / ryd2ev
ENDIF
CALL mp_bcast(et_tmp, meta_ionode_id, world_comm)
!
CALL fkbounds(nkstot, ik_start, ik_stop)
et_ks(:, :) = et_loc(:, :)
et_loc(:, :) = et_tmp(:, ik_start:ik_stop)
ENDIF
CALL mp_bcast(et_tmp, meta_ionode_id, world_comm)
!
CALL fkbounds(nkstot, ik_start, ik_stop)
et_ks(:, :) = et_loc(:, :)
et_loc(:, :) = et_tmp(:, ik_start:ik_stop)
ELSE
! if starting from epwread, do not need to get external eigs from file.
! allocate zero sized array so no issues with deallocation at end of execution
ALLOCATE(et_ks(0, 0), STAT = ierr)
IF (ierr /= 0) CALL errore('elphon_shuffle_wrap', 'Error allocating et_ks', 1)
ENDIF
!
! gather electronic eigenvalues for subsequent shuffle

View File

@ -77,7 +77,10 @@
ephbloch2wanp_mem
USE wigner, ONLY : wigner_seitz_wrap
USE io_eliashberg, ONLY : write_ephmat, count_kpoints, kmesh_fine, kqmap_fine,&
check_restart_ephwrite
!!!!!
!check_restart_ephwrite
check_restart_ephwrite, write_dos, write_phdos
!!!!!
USE transport, ONLY : transport_coeffs, scattering_rate_q
USE grid, ONLY : qwindow, loadkmesh_fst, xqf_otf
USE printing, ONLY : print_gkk, plot_band, plot_fermisurface
@ -1820,6 +1823,15 @@
ENDDO ! itempphen
ENDIF
ENDIF
!!!!!
!
! SH: Write the electronic and phonon dos files
IF ((.NOT. band_plot) .AND. eliashberg) THEN
CALL write_dos(ef, nelec)
CALL write_phdos()
ENDIF
!
!!!!!
IF (band_plot) CALL plot_band()
!
IF (fermi_plot) CALL plot_fermisurface()

View File

@ -70,7 +70,10 @@
ephbloch2wanp_mem
USE wigner, ONLY : wigner_seitz_wrap
USE io_eliashberg, ONLY : write_ephmat, count_kpoints, kmesh_fine,kqmap_fine, &
check_restart_ephwrite
!!!!!
!check_restart_ephwrite
check_restart_ephwrite, write_dos, write_phdos
!!!!!
USE transport, ONLY : transport_coeffs, scattering_rate_q
USE grid, ONLY : qwindow
USE printing, ONLY : print_gkk, plot_band, plot_fermisurface
@ -1482,6 +1485,15 @@
ENDDO ! itempphen
ENDIF
ENDIF
!!!!!
!
! SH: Write the electronic and phonon dos files
IF ((.NOT. band_plot) .AND. eliashberg) THEN
CALL write_dos(ef, nelec)
CALL write_phdos()
ENDIF
!
!!!!!
IF (band_plot) CALL plot_band()
!
IF (fermi_plot) CALL plot_fermisurface()

View File

@ -10,7 +10,7 @@
PROGRAM epw
!-----------------------------------------------------------------------
!! author: Samuel Ponce', Roxana Margine, Carla Verdi, Feliciano Giustino
!! version: v5.4.1
!! version: v5.5
!! license: GNU
!! summary: EPW main driver
!!
@ -21,7 +21,7 @@
USE mp, ONLY : mp_bcast, mp_barrier
USE mp_world, ONLY : mpime
USE mp_global, ONLY : mp_startup, ionode_id, mp_global_end
USE control_flags, ONLY : gamma_only
USE control_flags, ONLY : gamma_only, use_gpu
USE control_epw, ONLY : wannierize
USE global_version, ONLY : version_number
USE epwcom, ONLY : filukk, eliashberg, ep_coupling, epwread, epbread, cumulant
@ -36,15 +36,18 @@
IMPLICIT NONE
!
CHARACTER(LEN = 12) :: code = 'EPW'
LOGICAL,EXTERNAL :: check_gpu_support
!! Name of the program
!
version_number = '5.4.1'
version_number = '5.5'
!
CALL init_clocks(.TRUE.)
!
CALL start_clock('EPW')
!
gamma_only = .FALSE.
use_gpu = check_gpu_support()
IF(use_gpu) Call errore('EPW', 'EPW with GPU NYI', 1)
!
CALL mp_startup(start_images = .TRUE.)
!

View File

@ -48,6 +48,8 @@
USE poolgathering, ONLY : poolgather_int, poolgather_int1
USE io_epw, ONLY : readwfc
USE dvqpsi, ONLY : dvanqq2
USE Coul_cut_2D, ONLY : do_cutoff_2D
USE Coul_cut_2D_ph, ONLY : cutoff_lr_Vlocq, cutoff_fact_qg
USE scf, ONLY : v, vltot
USE fft_base, ONLY : dfftp
USE fft_interfaces, ONLY : fwfft
@ -172,6 +174,19 @@
!
END DO
!
! From PHonon/PH/phq_init.f90
! SP: For 2d calculations, we need to initialize the fact for the q+G
! component of the cutoff of the Coulomb interaction
IF (do_cutoff_2D) call cutoff_fact_qg()
!
! In 2D calculations the long range part of vlocq(g) (erf/r part)
! was not re-added in g-space because everything is caclulated in
! radial coordinates, which is not compatible with 2D cutoff.
! It will be re-added each time vlocq(g) is used in the code.
! Here, this cutoff long-range part of vlocq(g) is computed only once
! by the routine below and stored
IF (do_cutoff_2D) call cutoff_lr_Vlocq()
!
IF (first_run) THEN
ALLOCATE(igk_k_all(npwx, nkstot), STAT = ierr)
IF (ierr /= 0) CALL errore('epw_init', 'Error allocating igk_k_all', 1)

View File

@ -40,10 +40,16 @@
wmax, wmin, mp_mesh_q, mp_mesh_k, filqf, filkf, nswi, nc, &
delta_qsmear, degaussq, band_plot, ephwrite, nstemp, &
broyden_beta, conv_thr_raxis, temps, tempsmin, tempsmax, &
broyden_ndim, wscut, wsfc, nqstep, limag, lreal, muc, &
gap_edge, conv_thr_iaxis, nqsmear, iprint, wepexst, nswfc, &
epwread, eliashberg, imag_read, kerread, kerwrite, lunif, &
fermi_energy, efermi_read, max_memlt, fila2f, pwc, nswc, &
!!!!!
! broyden_ndim, wscut, wsfc, nqstep, limag, lreal, muc, &
! gap_edge, conv_thr_iaxis, nqsmear, iprint, wepexst, nswfc, &
! epwread, eliashberg, imag_read, kerread, kerwrite, lunif, &
! fermi_energy, efermi_read, max_memlt, fila2f, pwc, nswc, &
broyden_ndim, wscut, nqstep, limag, lreal, muc, &
gap_edge, conv_thr_iaxis, nqsmear, iprint, wepexst, &
epwread, eliashberg, imag_read, kerread, kerwrite, &
fermi_energy, efermi_read, max_memlt, fila2f, &
!!!!!
ep_coupling, nw_specfun, wmax_specfun, wmin_specfun, &
laniso, lpolar, lifc, asr_typ, lscreen, scr_typ, nbndsub, &
fermi_diff, smear_rpa, cumulant, bnd_cum, proj, write_wfn, &
@ -60,13 +66,13 @@
wannier_plot_supercell, wannier_plot_scale, reduce_unk, &
wannier_plot_radius, fermi_plot, fixsym, epw_no_t_rev, &
epw_tr, epw_nosym, epw_noinv, epw_crysym, &
bfieldx, bfieldy, bfieldz, tc_linear, tc_linear_solver, &
!!!!!
!mob_maxfreq, mob_nfreq
mob_maxfreq, mob_nfreq, ii_g, ii_charge, ii_n, &
ii_scattering, ii_only, ii_lscreen, ii_eda, ii_partion, &
ii_eps0
! bfieldx, bfieldy, bfieldz, tc_linear, tc_linear_solver, &
bfieldx, bfieldy, bfieldz, &
ii_g, ii_charge, ii_n, ii_scattering, ii_only, ii_lscreen, &
ii_eda, ii_partion, ii_eps0, &
!!!!!
mob_maxfreq, mob_nfreq
USE klist_epw, ONLY : xk_all, xk_loc, xk_cryst, isk_all, isk_loc, et_all, et_loc
USE elph2, ONLY : elph, num_wannier_plot, wanplotlist, gtemp
USE constants_epw, ONLY : ryd2mev, ryd2ev, ev2cmm1, kelvin2eV, zero, eps20, ang2m
@ -99,6 +105,12 @@
sigma_plrn, ethr_Plrn, full_diagon_plrn, mixing_Plrn, &
init_plrn_wf, niterPlrn, nDOS_plrn, emax_plrn, emin_plrn, &
sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn
!!!!!
!-------------------------------------------------------------------------------------
! SH: Added for tc linearized equation, sparce sampling, and full-bandwidth calculations
USE epwcom, ONLY : gridsamp, griddens, tc_linear, tc_linear_solver, fbw, &
dos_del, muchem
!!!!!
! -------------------------------------------------------------------------------------
!
IMPLICIT NONE
@ -143,20 +155,33 @@
!
NAMELIST / inputepw / &
amass, outdir, prefix, iverbosity, fildvscf, rand_q, rand_nq, rand_k, &
elph, nq1, nq2, nq3, nk1, nk2, nk3, nbndsub, rand_nk, specfun_pl, nswc, &
filukk, epbread, epbwrite, epwread, epwwrite, etf_mem, nswfc, &
!!!!!
! elph, nq1, nq2, nq3, nk1, nk2, nk3, nbndsub, rand_nk, specfun_pl, nswc, &
! filukk, epbread, epbwrite, epwread, epwwrite, etf_mem, nswfc, &
elph, nq1, nq2, nq3, nk1, nk2, nk3, nbndsub, rand_nk, specfun_pl, &
filukk, epbread, epbwrite, epwread, epwwrite, etf_mem, &
!!!!!
eig_read, wepexst, epexst, vme, elecselfen, phonselfen, use_ws, nc, &
degaussw, fsthick, nsmear, delta_smear, nqf1, nqf2, nqf3, nkf1, nkf2, &
dvscf_dir, ngaussw, epmatkqread, selecqread, nkf3, mp_mesh_k, mp_mesh_q,&
wannierize, dis_win_max, dis_win_min, dis_froz_min, dis_froz_max, nswi, &
num_iter, proj, bands_skipped, wdata, iprint, write_wfn, ephwrite, &
wmin, wmax, nw, eps_acustic, a2f, nest_fn, plselfen, filqf, filkf, &
band_plot, fermi_plot, degaussq, delta_qsmear, nqsmear, nqstep, pwc, &
!!!!!
! band_plot, fermi_plot, degaussq, delta_qsmear, nqsmear, nqstep, pwc, &
band_plot, fermi_plot, degaussq, delta_qsmear, nqsmear, nqstep, &
!!!!!
broyden_beta, broyden_ndim, nstemp, temps, bfieldx, bfieldy, bfieldz, &
conv_thr_raxis, conv_thr_iaxis, conv_thr_racon, wsfc, wscut, system_2d, &
!!!!!
! conv_thr_raxis, conv_thr_iaxis, conv_thr_racon, wsfc, wscut, system_2d, &
conv_thr_raxis, conv_thr_iaxis, conv_thr_racon, wscut, system_2d, &
!!!!!
gap_edge, nsiter, muc, lreal, limag, lpade, lacon, liso, laniso, lpolar,&
npade, lscreen, scr_typ, fermi_diff, smear_rpa, cumulant, bnd_cum, &
lifc, asr_typ, lunif, kerwrite, kerread, imag_read, eliashberg, &
!!!!!
! lifc, asr_typ, lunif, kerwrite, kerread, imag_read, eliashberg, &
lifc, asr_typ, kerwrite, kerread, imag_read, eliashberg, &
!!!!!
ep_coupling, fila2f, max_memlt, efermi_read, fermi_energy, &
specfun_el, specfun_ph, wmin_specfun, wmax_specfun, nw_specfun, &
delta_approx, scattering, int_mob, scissor, ncarrier, carrier, &
@ -167,11 +192,12 @@
scdm_sigma, assume_metal, wannier_plot, wannier_plot_list, reduce_unk, &
wannier_plot_supercell, wannier_plot_scale, wannier_plot_radius, &
fixsym, epw_no_t_rev, epw_tr, epw_nosym, epw_noinv, epw_crysym, &
tc_linear, tc_linear_solver, mob_maxfreq, mob_nfreq, &
!!!!!
ii_g, ii_charge, ii_n, ii_scattering, ii_only, ii_lscreen, ii_eda, &
! tc_linear, tc_linear_solver, mob_maxfreq, mob_nfreq, &
mob_maxfreq, mob_nfreq, &
ii_g, ii_charge, ii_n, ii_scattering, ii_only, ii_lscreen, ii_eda, &
ii_partion, ii_eps0, &
!!!!!
!!!!!
!---------------------------------------------------------------------------------
! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
! Shell implementation for future use.
@ -181,7 +207,13 @@
phonon_dos, diag_mode, restart_polaron_mode, polaron_type, &
niterPlrn, wfcelec_old, sigma_plrn, ethr_Plrn, full_diagon_plrn, &
mixing_Plrn, init_plrn_wf, nPlrn, nDOS_plrn, emax_plrn, emin_plrn, &
sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn
!!!!!
! sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn
sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn, &
!---------------------------------------------------------------------------------
! SH: Added for tc linearized equation, sparce sampling, and full-bandwidth runs
tc_linear, tc_linear_solver, gridsamp, griddens, fbw, dos_del, muchem
!!!!!
! --------------------------------------------------------------------------------
!
! amass : atomic masses
@ -253,10 +285,12 @@
! delta_qsmear: change in energy for each additional smearing in the a2f (units of meV)
! nqsmear : number of smearings used to calculate a2f
! nqstep : number of bins for frequency used to calculate a2f
! nswfc : nr. of grid points between (0,wsfc) in Eliashberg equations
! nswc : nr. of grid points between (wsfc,wscut)
! pwc : power used to define nswc for non-uniform grid real-axis calculations
! wsfc : intermediate freqeuncy used for integration in Eliashberg equations (at least 2-3 times wsphmax)
!!!!! these comment lines are deleted!
! ! nswfc : nr. of grid points between (0,wsfc) in Eliashberg equations
! ! nswc : nr. of grid points between (wsfc,wscut)
! ! pwc : power used to define nswc for non-uniform grid real-axis calculations
! ! wsfc : intermediate freqeuncy used for integration in Eliashberg equations (at least 2-3 times wsphmax)
!!!!!
! wscut : upper limit for frequency integration in Eliashberg equations (at least 5 times wsphmax) (units of eV)
! broyden_beta : mixing factor for broyden mixing
! broyden_ndim : number of iterations used in mixing scheme
@ -279,7 +313,9 @@
! Eliashberg equtions to real-axis
! liso : if .TRUE. solve isotropic case
! laniso : if .TRUE. solve anisotropic case
! lunif : if .TRUE. a uniform grid is defined between wsfc and wscut for real-axis calculations
!!!!! deleted comment line
! ! lunif : if .TRUE. a uniform grid is defined between wsfc and wscut for real-axis calculations
!!!!!
! kerwrite : if .TRUE. write kp and km to files .ker for real-axis calculations
! kerread : if .TRUE. read kp and km from files .ker for real-axis calculations
! imag_read : if .TRUE. read from files Delta and Znorm on the imaginary-axis
@ -294,7 +330,18 @@
! nw_specfun : nr. of bins for frequency in electron spectral function due to e-p interaction
! system_2d : if .TRUE. two-dimensional system (vaccum is in z-direction)
! delta_approx : if .TRUE. the double delta approximation is used to compute the phonon self-energy
!!!!! these comment lines are added
!
! Added by Samad Hajinazar
! tc_linear : if .TRUE. linearized Eliashberg eqn. for Tc will be solved
! tc_linear_solver : Algorithm to solve eigenvalue problem for Tc (default='power', 'lapack')
! gridsamp : Type of the Matsubara freq. sampling (-1=read from file;0=uniform;1=sparse)
! griddens : Measure of sparsity of the grid (default=1.d0, larger values give denser mesh)
! fbw : if .TRUE. full-bandwidth calculations will be performed
! dos_del : Delta_E in electronic dos for Fermi window (in eV)
! muchem : if .TRUE. chem. pot. is updated in fbw calculations
!
!!!!!
! Added by Carla Verdi & Samuel Pon\'e
! lpolar : if .TRUE. enable the correct Wannier interpolation in the case of polar material.
! lifc : if .TRUE. reads interatomic force constants produced by q2r.x for phonon interpolation
@ -505,7 +552,9 @@
delta_qsmear = 0.05d0 ! meV
degaussq = 0.05d0 ! meV
lreal = .FALSE.
lunif = .TRUE.
!!!!!
! lunif = .TRUE.
!!!!!
limag = .FALSE.
lpade = .FALSE.
lacon = .FALSE.
@ -527,12 +576,23 @@
ep_coupling = .TRUE.
tc_linear = .FALSE.
tc_linear_solver = 'power'
nswfc = 0
nswc = 0
!!!!!
! nswfc = 0
! nswc = 0
gridsamp = 0
griddens = 1.d0
fbw = .FALSE.
dos_del = 1.d-03
muchem = .FALSE.
!!!!!
nswi = 0
pwc = 1.d0
!!!!!
! pwc = 1.d0
!!!!!
wscut = 0.d0
wsfc = 0.5d0 * wscut
!!!!!
! wsfc = 0.5d0 * wscut
!!!!!
broyden_beta = 0.7d0
broyden_ndim = 8
conv_thr_raxis = 5.d-04
@ -655,8 +715,8 @@
ENDIF
IF (ios2 /= 0) CALL errore('epw_readin', 'Could not find namelist &inputepw', 2)
IF (ios /= 0) THEN
CALL errore('epw_readin', 'Bad line in namelist &inputepw'&
': "'//TRIM(line)//'" (error could be in the previous line)', 1)
CALL errore('epw_readin', 'Bad line in namelist &inputepw: "' &
& //TRIM(line)//'" (error could be in the previous line)', 1)
ENDIF
ios = close_input_file ( )
ENDIF ! meta_ionode

View File

@ -39,6 +39,10 @@
CHARACTER(LEN = 10) :: vme
!! if 'dipole' then computes the velocity as dipole+commutator = <\psi_mk|p+i[V_NL,r]|\psi_nk>
!! if 'wannier' then computes the velocity as dH_nmk/dk - i(e_nk-e_mk)A_nmk where A is the Berry connection
!!!!!
CHARACTER(LEN = 10) :: tc_linear_solver
!! algorithm to solve T_c eigenvalue problem
!!!!!
!
LOGICAL :: elecselfen
!! if .TRUE. calculate electron selfenergy due to e-p interaction
@ -142,8 +146,10 @@
!! if .TRUE. solve isotropic case
LOGICAL :: laniso
!! if .TRUE. solve anisotropic case
LOGICAL :: lunif
!! if .TRUE. a uniform grid is defined between wsfc and wc for real-axis calculations
!!!!!
! LOGICAL :: lunif
! !! if .TRUE. a uniform grid is defined between wsfc and wc for real-axis calculations
!!!!!
LOGICAL :: kerwrite
!! if .TRUE. write kp and km to files .ker for real-axis calculations
LOGICAL :: kerread
@ -153,9 +159,15 @@
LOGICAL :: eliashberg
!! if .TRUE. solve the Eliashberg equations
LOGICAL :: tc_linear
!! if .TRUE. linearized Eliashberg eqn. for T_c will be solved
CHARACTER(LEN = 10) :: tc_linear_solver
!! algorithm to solve T_c eigenvalue problem
!! if .TRUE. linearized Eliashberg eqn. for T_c will be solved
!!!!!
! CHARACTER(LEN = 10) :: tc_linear_solver
! !! algorithm to solve T_c eigenvalue problem
LOGICAL :: fbw
!! if .TRUE. full-bandwidth calculations will be performed
LOGICAL :: muchem
!! if .TURE. the chem. pot. is updated in fbw calculations
!!!!!
!
! Conductivity
LOGICAL :: scattering
@ -241,10 +253,6 @@
!! input temperature array (units of Kelvin)
!
! Superconductivity
INTEGER :: nswfc
!! nr. of grid points between (0,wsfc)
INTEGER :: nswc
!! nr. of grid points between (wsfc,wscut)
INTEGER :: nswi
!! nr. of grid points for Eliashberg equations of imaginary axis
INTEGER :: nsiter
@ -257,6 +265,10 @@
!! nr. of bins for frequency in electron spectral function due to e-p interaction
INTEGER :: restart_step
!! Create a restart point during the interpolation part every restart_step q/k-points.
!!!!!
INTEGER :: gridsamp
!! Type of the Matsubara freq. sampling (-1= read from file; 0= uniform; 1= sparse)
!!!!!
!
REAL(KIND = DP) :: degaussw
!! smearing width for Fermi surface average in e-ph coupling after wann interp
@ -304,10 +316,12 @@
!! change in energy for each additional smearing in the a2f
REAL(KIND = DP) :: muc
!! effective Coulomb potential in Eliashberg equations
REAL(KIND = DP) :: wsfc
!! intermediate freqeuncy between (0,wscut)
REAL(KIND = DP) :: pwc
!! power used to define a non-uniform grid between wsfc and wscut
!!!!!
! REAL(KIND = DP) :: wsfc
! !! intermediate freqeuncy between (0,wscut)
! REAL(KIND = DP) :: pwc
! !! power used to define a non-uniform grid between wsfc and wscut
!!!!!
REAL(KIND = DP) :: wscut
!! upper limit cutoff frequency in Eliashberg equations (at least 5 times wsphmax)
REAL(KIND = DP) :: broyden_beta
@ -329,6 +343,12 @@
!! min frequency in electron spectral function due to e-p interaction
REAL(KIND = DP) :: wmax_specfun
!! max frequency in electron spectral function due to e-p `interaction
!!!!!
REAL(KIND = DP) :: dos_del
!! Delta_E in electronic dos for Fermi window (in eV)
REAL(KIND = DP) :: griddens
!! Measure of sparsity of the grid
!!!!!
!
! Conductivity
INTEGER :: mob_nfreq

View File

@ -1463,18 +1463,12 @@
sa(:, :) = DBLE(s(:, :, nb))
xkf_rot = MATMUL(sa, xkf_tmp(:, ik + lower_bnd - 1))
!
DO i = 1, 3
IF (xkf_rot(1) < - eps8) xkf_rot(1) = xkf_rot(1) + 1.0d0
IF (xkf_rot(2) < - eps8) xkf_rot(2) = xkf_rot(2) + 1.0d0
IF (xkf_rot(3) < - eps8) xkf_rot(3) = xkf_rot(3) + 1.0d0
ENDDO
!
! Check that the point xkf_rot is part of the orginal xkf_in
found = .FALSE.
DO jk = 1, nkpt_bzfst
IF ((ABS(xkf_rot(1) - xkf_in(1, jk)) < eps8) .AND. &
(ABS(xkf_rot(2) - xkf_in(2, jk)) < eps8) .AND. &
(ABS(xkf_rot(3) - xkf_in(3, jk)) < eps8)) THEN
IF ((ABS(xkf_rot(1) - xkf_in(1, jk) - NINT(xkf_rot(1) - xkf_in(1, jk))) < eps8) .AND. &
(ABS(xkf_rot(2) - xkf_in(2, jk) - NINT(xkf_rot(2) - xkf_in(2, jk))) < eps8) .AND. &
(ABS(xkf_rot(3) - xkf_in(3, jk) - NINT(xkf_rot(3) - xkf_in(3, jk))) < eps8)) THEN
found = .TRUE.
EXIT
ENDIF

File diff suppressed because it is too large Load Diff

View File

@ -24,7 +24,10 @@
iua2ffil, iudosfil, iufillambda, iuqdos, iufe, iufilker, iuquad, &
iufilgap, iospectral_sup, iua2ftrfil, iufilgapFS, iufillambdaFS, &
iospectral_cum, iuwanep, iuwane, iunukk, iudvscf, iuqpeig, iures, &
iuint3paw
!!!!!
! iuint3paw
iuint3paw, iufildos, iufilmat
!!!!!
PUBLIC :: epwdata, iundmedata, iunvmedata, iunksdata, iudyn, iukgmap, iuepb, &
iufilfreq, iufilegnv, iufileph, iufilkqmap, iunpattern, iufilmu_q, &
iufilikmap, iueig, iunepmatwp, iunepmatwe, iunkf, iunqf, iufilFS, &
@ -73,6 +76,10 @@
INTEGER :: iudvscf = 80 ! Unit for the dvscf_q file
INTEGER :: iudyn = 81 ! Unit for the dynamical matrix file
INTEGER :: iufilkqmap = 82 ! Map of k+q
!!!!!
INTEGER :: iufilmat = 87 ! Matsubara indices
INTEGER :: iufildos = 88 ! electronic DOS in Fermi windows [prefix.dos]
!!!!!
INTEGER :: iukgmap = 96 ! Map of folding G-vector indexes [.kgmap]
INTEGER :: iuwanep = 97 ! Spatial decay of e-p matrix elements in wannier basis
! Electrons + phonons [epmat_wanep]

View File

@ -1062,7 +1062,10 @@
! Local variables
CHARACTER(LEN = 256) :: chunit
!! Unit name
INTEGER :: imelt
!!!!!
! INTEGER :: imelt
INTEGER(8) :: imelt
!!!!!
!! Size in number of elements
REAL(KIND = DP) :: rmelt
!! Size in byte
@ -1089,10 +1092,15 @@
!-----------------------------------------------------------------------
SUBROUTINE mem_size_eliashberg(vmelt, imelt)
!-----------------------------------------------------------------------
!
! This routine estimates the amount of memory taken up or
! released by different arrays
!
!!
!! This routine estimates the amount of memory taken up or
!! released by different arrays
!!!!! these comment lines are added
!!
!! SH: The "imelt" variable type is changed to INTEGER(8) throughout the
!! code to avoid issues with large Nr of k-points, etc (Nov 2021).
!!
!!!!!
USE io_global, ONLY : stdout
USE kinds, ONLY : DP
USE epwcom, ONLY : max_memlt
@ -1105,7 +1113,10 @@
!
INTEGER, INTENT(in) :: vmelt
!! 1 for integer variables and 2 for real variables
INTEGER, INTENT(in) :: imelt
!!!!!
! INTEGER, INTENT(in) :: imelt
INTEGER(8), INTENT(in) :: imelt
!!!!!
!! > 0 memory added or < 0 memory subtracted
!
REAL(KIND = DP) :: rmelt
@ -1156,7 +1167,10 @@
USE io_global, ONLY : stdout
USE epwcom, ONLY : max_memlt, nqstep
USE eliashbergcom, ONLY : nkfs, nbndfs, nsiw, nqfs, limag_fly, &
lacon_fly, memlt_pool
!!!!!
! lacon_fly, memlt_pool
lacon_fly, memlt_pool, wsn
!!!!!
USE mp_global, ONLY : inter_pool_comm, my_pool_id
USE mp, ONLY : mp_bcast, mp_barrier, mp_sum
USE division, ONLY : fkbounds
@ -1170,7 +1184,10 @@
!! calculation type
!
!Local variables
INTEGER :: imelt
!!!!!
! INTEGER :: imelt
INTEGER(8) :: imelt
!!!!!
!! size array
INTEGER :: lower_bnd, upper_bnd
!! Lower/upper bound index after k parallelization
@ -1192,7 +1209,12 @@
imelt = (upper_bnd - lower_bnd + 1) * MAXVAL(nqfs(:)) * nbndfs**2
IF (cname == 'imag') THEN
! get the size of the akeri that needa to be stored in each pool
imelt = imelt * (2 * nsiw(itemp))
!!!!! first line is changed, and 3rd and 4th lines are added
! imelt = imelt * (2 * nsiw(itemp))
!
! SH: This is adjusted to accommodate the sparse sampling case
imelt = imelt * 2 * (wsn(nsiw(itemp)) + 1)
!!!!!
ELSEIF (cname == 'acon') THEN
! get the size of a2fij that needs to be stored in each pool
imelt = imelt * nqstep

View File

@ -222,7 +222,6 @@ PWOBJS = \
OTHEROBJ = \
../../atomic/src/vxcgc.o \
../../upftools/interpolate.o \
../../flib/set_hubbard_l.o \
../../flib/w1gauss.o \
../../flib/wgauss.o

View File

@ -880,7 +880,10 @@
!! This routine print a header for superconductivity calculation
!!
USE io_global, ONLY : stdout
USE epwcom, ONLY : liso, laniso, lreal, imag_read, wscut
!!!!!
! USE epwcom, ONLY : liso, laniso, lreal, imag_read, wscut
USE epwcom, ONLY : liso, laniso, lreal, imag_read, wscut, fbw
!!!!!
USE elph2, ONLY : gtemp
USE eliashbergcom, ONLY : nsiw, nsw
USE constants_epw, ONLY : kelvin2eV
@ -897,12 +900,26 @@
WRITE(stdout, '(a)') ' '
WRITE(stdout, '(5x, a, i3, a, f12.5, a, a, i3, a)') 'temp(', itemp, ') = ', gtemp(itemp) / kelvin2eV, ' K'
WRITE(stdout, '(a)') ' '
IF (liso) &
!!!!!
!IF (liso) &
! WRITE(stdout, '(5x, a)') 'Solve isotropic Eliashberg equations on imaginary-axis'
!IF (laniso .AND. .NOT. imag_read) &
! WRITE(stdout, '(5x, a)') 'Solve anisotropic Eliashberg equations on imaginary-axis'
!IF (laniso .AND. imag_read) &
! WRITE(stdout, '(5x, a)') 'Read from file delta and znorm on imaginary-axis'
IF (liso .AND. .NOT. fbw) &
WRITE(stdout, '(5x, a)') 'Solve isotropic Eliashberg equations on imaginary-axis'
IF (laniso .AND. .NOT. imag_read) &
IF (liso .AND. fbw) &
WRITE(stdout, '(5x, a)') 'Solve full-bandwidth isotropic Eliashberg equations on imaginary-axis'
IF (laniso .AND. .NOT. fbw .AND. .NOT. imag_read) &
WRITE(stdout, '(5x, a)') 'Solve anisotropic Eliashberg equations on imaginary-axis'
IF (laniso .AND. imag_read) &
WRITE(stdout, '(5x, a)') 'Read from file delta and znorm on imaginary-axis '
IF (laniso .AND. fbw .AND. .NOT. imag_read) &
WRITE(stdout, '(5x, a)') 'Solve full-bandwidth anisotropic Eliashberg equations on imaginary-axis'
IF (laniso .AND. .NOT. fbw .AND. imag_read .AND. itemp == 1) &
WRITE(stdout, '(5x, a)') 'Read from file delta and znorm on imaginary-axis'
IF (laniso .AND. fbw .AND. imag_read .AND. itemp == 1) &
WRITE(stdout, '(5x, a)') 'Read from file delta and znorm and shift on imaginary-axis'
!!!!
WRITE(stdout, '(a)') ' '
WRITE(stdout, '(5x, a, i6, a, i6)') 'Total number of frequency points nsiw(', itemp, ') = ', nsiw(itemp)
WRITE(stdout, '(5x, a, f10.4)') 'Cutoff frequency wscut = ', (2.d0 * nsiw(itemp) + 1) * pi * gtemp(itemp)
@ -911,10 +928,20 @@
!
IF (cal_type == 2) THEN
WRITE(stdout, '(a)') ' '
IF (liso) &
WRITE(stdout, '(5x, a)') 'Pade approximant of isotropic Eliashberg equations from imaginary-axis to real-axis'
IF (laniso) &
WRITE(stdout, '(5x, a)') 'Pade approximant of anisotropic Eliashberg equations from imaginary-axis to real-axis'
!!!!!
!IF (liso) &
! WRITE(stdout, '(5x, a)') 'Pade approximant of isotropic Eliashberg equations from imaginary-axis to real-axis'
!IF (laniso) &
! WRITE(stdout, '(5x, a)') 'Pade approximant of anisotropic Eliashberg equations from imaginary-axis to real-axis'
IF (liso .AND. .NOT. fbw) WRITE(stdout, '(5x, a)') &
'Pade approximant of isotropic Eliashberg equations from imaginary-axis to real-axis'
IF (laniso .AND. .NOT. fbw) WRITE(stdout, '(5x, a)') &
'Pade approximant of anisotropic Eliashberg equations from imaginary-axis to real-axis'
IF (liso .AND. fbw) WRITE(stdout, '(5x, a)') &
'Pade approximant of full-bandwidth isotropic Eliashberg equations from imaginary-axis to real-axis'
IF (laniso .AND. fbw) WRITE(stdout, '(5x, a)') &
'Pade approximant of full-bandwidth anisotropic Eliashberg equations from imaginary-axis to real-axis'
!!!!!
WRITE(stdout, '(5x, a, f10.4)') 'Cutoff frequency wscut = ', wscut
WRITE(stdout, '(a)') ' '
ENDIF
@ -1484,6 +1511,10 @@
/ (carrier_density(itemp) * hbarJ)
mobb_bte(:, :, itemp) = (sigmab_bte(:, :, itemp) * electron_si * (bohr2ang * ang2cm)**2) &
/ (carrier_density(itemp) * hbarJ)
!
! To make the diagonal of mobb zero.
mobb_serta(:, :, itemp) = mobb_serta(:, :, itemp) - mob_serta(:, :, itemp)
mobb_bte(:, :, itemp) = mobb_bte(:, :, itemp) - mob_bte(:, :, itemp)
!
! Convert conductivity tensor in SI units [Siemens m^-1=Coulomb s^-1 V^-1 m^-d ]
! in 3d: cm^2 s^-1 V^-1 * (cm ^-2 cmtom^-1 C) = Coulomb s^-1 V^-1
@ -1522,7 +1553,7 @@
WRITE(stdout, '(4x,3E14.5,a,3E14.5)') mob_serta(:, 2, itemp), ' |', mobb_serta(:, 2, itemp)
WRITE(stdout, '(4x,3E14.5,a,3E14.5)') mob_serta(:, 3, itemp), ' |', mobb_serta(:, 3, itemp)
!
sigma_inv(:, :, itemp) = matinv3(sigma_serta(:, :, itemp))
!sigma_inv(:, :, itemp) = matinv3(sigma_serta(:, :, itemp))
IF (system_2d) THEN ! We suppose vacuum is in the z direction
mob_serta(3, 3, :) = 1d0
mob_inv(:, :, itemp) = matinv3(mob_serta(:, :, itemp))
@ -1530,7 +1561,7 @@
ELSE
mob_inv(:, :, itemp) = matinv3(mob_serta(:, :, itemp))
ENDIF
hall_serta(:, :, itemp) = MATMUL(MATMUL(mobb_serta(:, :, itemp), mob_inv(:, :, itemp)), &
hall_serta(:, :, itemp) = MATMUL(MATMUL(mob_inv(:, :, itemp), mobb_serta(:, :, itemp)), &
mob_inv(:, :, itemp)) / (b_norm * hbarJ ) * electron_si * (bohr2ang * ang2cm)**2
!
! bfield is energy*sec/lenght**2, mobility is in cm**2 V**-1 sec**-1.
@ -1563,7 +1594,7 @@
WRITE(stdout, '(4x,3E14.5,a,3E14.5)') mob_bte(:, 2, itemp), ' |', mobb_bte(:, 2, itemp)
WRITE(stdout, '(4x,3E14.5,a,3E14.5)') mob_bte(:, 3, itemp), ' |', mobb_bte(:, 3, itemp)
!
sigma_inv(:, :, itemp) = matinv3(sigma_bte(:, :, itemp))
!sigma_inv(:, :, itemp) = matinv3(sigma_bte(:, :, itemp))
IF (system_2d) THEN ! We suppose vacuum is in the z direction
mob_bte(3, 3, :) = 1d0
mob_inv(:, :, itemp) = matinv3(mob_bte(:, :, itemp))
@ -1571,7 +1602,7 @@
ELSE
mob_inv(:, :, itemp) = matinv3(mob_bte(:, :, itemp))
ENDIF
hall(:, :, itemp) = MATMUL(MATMUL(mobb_bte(:, :, itemp), mob_inv(:, :, itemp)), &
hall(:, :, itemp) = MATMUL(MATMUL(mob_inv(:, :, itemp), mobb_bte(:, :, itemp)), &
mob_inv(:, :, itemp)) / (b_norm * hbarJ ) * electron_si * (bohr2ang * ang2cm)**2
!
! bfield is energy*sec/lenght**2, mobility is in cm**2 V**-1 sec**-1.

View File

@ -18,10 +18,15 @@
IMPLICIT NONE
SAVE
INTEGER :: igmin(3)
INTEGER :: igmin(3), igmin_qG(3)
!!
REAL(KIND = DP) :: qqcut
!!
REAL(KIND = DP), PARAMETER :: alph = 1.d0
!! Ewald parameter, units (2pi/alat)^{2}
REAL(KIND = DP), PARAMETER :: gmax = 14.d0
!! Cutoff criteria for G-sum:
!! e^{ - (q+G) \cdot \epsilon \cdot (q+G) / (4 \alpha) } < e^{ - gmax } terms are neglected.
!
CONTAINS
!
@ -52,6 +57,71 @@
!--------------------------------------------------------------------------
!
!-----------------------------------------------------------------------
SUBROUTINE find_min_qG(q)
!-----------------------------------------------------------------------
!!
!! Find igmin_qG = min_{G}|q+G|
!! to shift G-sum in long-range g
!! in order to ensure periodicity g(q+G')=g(q)
!!
USE constants_epw, ONLY : eps8
USE cell_base, ONLY : bg, at
USE io_global, ONLY : stdout
!
IMPLICIT NONE
!
REAL(KIND = DP), INTENT(in) :: q(3)
!! q-vector from the full coarse or fine grid, in crystal coords.
!
! Local variables
INTEGER :: m1
!
INTEGER :: m2
!
INTEGER :: m3
!! Loop over G-vectors
REAL(KIND = DP) :: g1
!!
REAL(KIND = DP) :: g2
!!
REAL(KIND = DP) :: g3
!! G-vector in cartesian coords.
REAL(KIND = DP) :: qq
!! |q+G|
REAL(KIND = DP) :: qqmin
!! min|q+G|
REAL(KIND = DP) :: qtmp(3)
!!
!
! Move q to 1BZ
qtmp(:) = q(:) - INT(q(:))
! Transform to cartesian coords
CALL cryst_to_cart(1, qtmp, bg, 1)
!
igmin_qG = 0
qqmin = 1E10
DO m1 = -2, 2
DO m2 = -2, 2
DO m3 = -2, 2
g1 = m1 * bg(1, 1) + m2 * bg(1, 2) + m3 * bg(1, 3) + qtmp(1)
g2 = m1 * bg(2, 1) + m2 * bg(2, 2) + m3 * bg(2, 3) + qtmp(2)
g3 = m1 * bg(3, 1) + m2 * bg(3, 2) + m3 * bg(3, 3) + qtmp(3)
qq = g1 * g1 + g2 * g2 + g3 * g3
IF (qqmin > qq) THEN
qqmin = qq
igmin_qG(1) = m1 - INT(q(1))
igmin_qG(2) = m2 - INT(q(2))
igmin_qG(3) = m3 - INT(q(3))
ENDIF
ENDDO
ENDDO
ENDDO
!
!--------------------------------------------------------------------------
END SUBROUTINE find_min_qG
!--------------------------------------------------------------------------
!
!-----------------------------------------------------------------------
SUBROUTINE find_gmin(q)
!-----------------------------------------------------------------------
!!
@ -138,7 +208,7 @@
!!
USE kinds, ONLY : DP
USE constants_epw, ONLY : pi, fpi, e2
USE cell_base, ONLY : bg, omega, alat
USE cell_base, ONLY : bg, omega, alat, at
USE constants_epw, ONLY : eps6, ci, zero, czero, twopi, eps8
USE io_global, ONLY : ionode_id
USE mp_world, ONLY : mpime
@ -200,8 +270,8 @@
!! (2*pi/a)^2
REAL(KIND = DP):: geg
!! <q+G| epsil | q+G>
REAL(KIND = DP) :: alph
!! Ewald parameter
!REAL(KIND = DP) :: alph
!!! Ewald parameter
REAL(KIND = DP) :: fac
!! General prefactor
REAL(KIND = DP) :: gg(3)
@ -210,8 +280,8 @@
!! fac * EXP(-geg / (alph * 4.0d0)) / geg
REAL(KIND = DP) :: arg
!! Argument of the exponential
REAL(KIND = DP) :: gmax
!! Maximum G
!REAL(KIND = DP) :: gmax
!!! Maximum G
REAL(KIND = DP) :: zag(3)
!! Z * G
REAL(KIND = DP) :: qag(3)
@ -222,6 +292,8 @@
!! Q * G
REAL(KIND = DP) :: c
!! vacuum size (supercell length along the z direction) in case of 2D
REAL(KIND = DP) :: qtmp(3)
!! Temporary q vector to find min_{G}|q+G|
COMPLEX(KIND = DP) :: fnat(3)
!! Z with \delta_kk' summed
COMPLEX(KIND = DP) :: qnat(3)
@ -274,8 +346,8 @@
fac = (signe * e2 * fpi) / omega
ENDIF
!
gmax = 14.d0
alph = 1.0d0
!gmax = 14.d0
!alph = 1.0d0
geg = gmax * alph * 4.0d0
!
! Estimate of nr1x,nr2x,nr3x generating all vectors up to G^2 < geg
@ -315,6 +387,11 @@
add = 0
ENDIF
!
!JLB: Find igmin_qG = min_{G}|q+G|
qtmp=q
CALL cryst_to_cart(1, qtmp, at, -1)
CALL find_min_qG(qtmp)
!
dyn_tmp(:, :) = czero
!
! DO mm = 1, mmax
@ -412,9 +489,15 @@
ENDIF ! geg
!
! Case q =/ 0
gg(1) = gg(1) + q(1) * (twopi / alat)
gg(2) = gg(2) + q(2) * (twopi / alat)
gg(3) = gg(3) + q(3) * (twopi / alat)
!gg(1) = gg(1) + q(1) * (twopi / alat)
!gg(2) = gg(2) + q(2) * (twopi / alat)
!gg(3) = gg(3) + q(3) * (twopi / alat)
!
!JLB: shift G-sum and center around min_{G}|q+G|, to ensure periodicity
gg(1) = ( (m1+igmin_qG(1)) * bg(1, 1) + (m2+igmin_qG(2)) * bg(1, 2) + (m3+igmin_qG(3)) * bg(1,3) + q(1)) * (twopi / alat)
gg(2) = ( (m1+igmin_qG(1)) * bg(2, 1) + (m2+igmin_qG(2)) * bg(2, 2) + (m3+igmin_qG(3)) * bg(2,3) + q(2)) * (twopi / alat)
gg(3) = ( (m1+igmin_qG(1)) * bg(3, 1) + (m2+igmin_qG(2)) * bg(3, 2) + (m3+igmin_qG(3)) * bg(3,3) + q(3)) * (twopi / alat)
!
!
IF (system_2d) THEN
geg = gg(1)**2 + gg(2)**2 + gg(3)**2
@ -523,7 +606,7 @@
!! \left[ U_{{\bf k}+{\bf q}}\:U_{{\bf k}}^{\dagger} \right]_{mn} $$
!!
USE kinds, ONLY : dp
USE cell_base, ONLY : bg, omega, alat
USE cell_base, ONLY : bg, omega, alat, at
USE ions_base, ONLY : tau, nat
USE constants_epw, ONLY : twopi, fpi, e2, ci, czero, eps12, zero, eps8
USE epwcom, ONLY : shortrange, lpolar, system_2d
@ -566,6 +649,10 @@
!! Loop over q-points
INTEGER :: nr1x, nr2x, nr3x
!! Minimum supercell size to include all vector such that G^2 < geg
INTEGER :: mmin(3), mmax(3)
!! Shifted G-loop to be centered around min_{G}|q+G|
INTEGER :: nGtest
!! Number of G-vectors within cutoff (for testing purposes only)
REAL(KIND = DP):: metric
!! (2*pi/a)^2
REAL(KIND = DP) :: qeq
@ -576,10 +663,10 @@
!! Z^* \cdot (q+g)
REAL(KIND = DP) :: gg(3)
!! G-vector
REAL(KIND = DP) :: gmax
!! Max G-vector
REAL(KIND = DP) :: alph
!! Ewald factor (arbitrary, here chosen to be 1)
!REAL(KIND = DP) :: gmax
!!! Max G-vector
!REAL(KIND = DP) :: alph
!!! Ewald factor (arbitrary, here chosen to be 1)
REAL(KIND = DP) :: geg
!! <q+G| epsil | q+G>
REAL(KIND = DP) :: Qqq
@ -590,6 +677,8 @@
!! G-vector * reff * G-vector
REAL(KIND = DP) :: c
!! vacuum size (supercell length along the z direction) in case of 2D
REAL(KIND = DP) :: qtmp(3)
!! Temporary q vector to find min_{G}|q+G|
COMPLEX(KIND = DP) :: fac
!! General prefactor
COMPLEX(KIND = DP) :: facqd
@ -622,8 +711,8 @@
fac = (signe * e2 * fpi * ci) / omega
ENDIF
!
gmax = 14.d0
alph = 1.0d0
!gmax = 14.d0
!alph = 1.0d0
metric = (twopi / alat)**2
geg = gmax * alph * 4.0d0
!
@ -645,10 +734,27 @@
nr3x = INT(SQRT(geg) / SQRT(bg(1, 3)**2 + bg(2, 3)**2 + bg(3, 3)**2)) + 1
ENDIF
!
!JLB: Find igmin_qG = min_{G}|q+G|
qtmp=q
CALL cryst_to_cart(1, qtmp, at, -1)
CALL find_min_qG(qtmp)
! shift G-sum and center around min_{G}|q+G|, to ensure periodicity
mmin(1) = -nr1x + igmin_qG(1)
mmax(1) = nr1x + igmin_qG(1)
mmin(2) = -nr2x + igmin_qG(2)
mmax(2) = nr2x + igmin_qG(2)
mmin(3) = -nr3x + igmin_qG(3)
mmax(3) = nr3x + igmin_qG(3)
!
epmatl(:) = czero
DO m1 = -nr1x, nr1x
DO m2 = -nr2x, nr2x
DO m3 = -nr3x, nr3x
!DO m1 = -nr1x, nr1x
! DO m2 = -nr2x, nr2x
! DO m3 = -nr3x, nr3x
!
DO m1 = mmin(1), mmax(1)
DO m2 = mmin(2), mmax(2)
DO m3 = mmin(3), mmax(3)
!
gg(1) = (m1 * bg(1, 1) + m2 * bg(1, 2) + m3 * bg(1, 3) + q(1)) * (twopi / alat)
gg(2) = (m1 * bg(2, 1) + m2 * bg(2, 2) + m3 * bg(2, 3) + q(2)) * (twopi / alat)
gg(3) = (m1 * bg(3, 1) + m2 * bg(3, 2) + m3 * bg(3, 3) + q(3)) * (twopi / alat)
@ -671,8 +777,9 @@
IF (system_2d) THEN
facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / (SQRT(qeq) * (1.0 + grg * SQRT(qeq)))
ELSE
! facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / qeq <-- this is correct
facqd = fac * EXP(-qeq * DSQRT(metric) / (metric * alph * 4.0d0)) / qeq ! <-- this is to keep as previous
facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / qeq !<-- this is correct
!facqd = fac * EXP(-qeq * DSQRT(metric) / (metric * alph * 4.0d0)) / qeq ! <-- this is to keep as previous
!facqd = fac / qeq ! no Ewald
ENDIF
!
DO na = 1, nat
@ -748,7 +855,7 @@
!! 10/2016 - SP: Optimization
!!
USE kinds, ONLY : DP
USE cell_base, ONLY : bg, omega, alat
USE cell_base, ONLY : bg, omega, alat, at
USE ions_base, ONLY : tau, nat
USE constants_epw, ONLY : twopi, fpi, e2, ci, czero, eps12, zero, eps8
USE epwcom, ONLY : shortrange, nbndsub, lpolar, system_2d
@ -792,6 +899,10 @@
!! Mode index
INTEGER :: nr1x, nr2x, nr3x
!! Minimum supercell size to include all vector such that G^2 < geg
INTEGER :: mmin(3), mmax(3)
!! Shifted G-loop to be centered around min_{G}|q+G|
INTEGER :: nGtest
!! Number of G-vectors within cutoff (for testing purposes only)
REAL(KIND = DP):: metric
!! (2*pi/a)^2
REAL(KIND = DP) :: qeq
@ -802,10 +913,10 @@
!! Z^* \cdot (q+g)
REAL(KIND = DP) :: gg(3)
!! G-vector
REAL(KIND = DP) :: gmax
!! Max G-vector
REAL(KIND = DP) :: alph
!! Ewald factor (arbitrary, here chosen to be 1)
!REAL(KIND = DP) :: gmax
!!! Max G-vector
!REAL(KIND = DP) :: alph
!!! Ewald factor (arbitrary, here chosen to be 1)
REAL(KIND = DP) :: geg
!! <G| epsil | G>
REAL(KIND = DP) :: reff(2, 2)
@ -816,6 +927,8 @@
!! In the case of Si, its a single value
REAL(KIND = DP) :: c
!! vacuum size (supercell length along the z direction) in case of 2D
REAL(KIND = DP) :: qtmp(3)
!! Temporary q vector to find min_{G}|q+G|
COMPLEX(KIND = DP) :: fac
!! General prefactor
COMPLEX(KIND = DP) :: facqd
@ -848,8 +961,8 @@
fac = (signe * e2 * fpi * ci) / omega
ENDIF
!
gmax = 14.d0
alph = 1.0d0
!gmax = 14.d0
!alph = 1.0d0
metric = (twopi / alat)**2
geg = gmax * alph * 4.0d0
!
@ -871,10 +984,26 @@
nr3x = INT(SQRT(geg) / SQRT(bg(1, 3)**2 + bg(2, 3)**2 + bg(3, 3)**2)) + 1
ENDIF
!
!JLB: Find igmin_qG = min_{G}|q+G|
qtmp=q
CALL cryst_to_cart(1, qtmp, at, -1)
CALL find_min_qG(qtmp)
! shift G-sum and center around min_{G}|q+G|, to ensure periodicity
mmin(1) = -nr1x + igmin_qG(1)
mmax(1) = nr1x + igmin_qG(1)
mmin(2) = -nr2x + igmin_qG(2)
mmax(2) = nr2x + igmin_qG(2)
mmin(3) = -nr3x + igmin_qG(3)
mmax(3) = nr3x + igmin_qG(3)
!
!nGtest = 0
epmatl(:, :, :) = czero
DO m1 = -nr1x, nr1x
DO m2 = -nr2x, nr2x
DO m3 = -nr3x, nr3x
!DO m1 = -nr1x, nr1x
! DO m2 = -nr2x, nr2x
! DO m3 = -nr3x, nr3x
DO m1 = mmin(1), mmax(1)
DO m2 = mmin(2), mmax(2)
DO m3 = mmin(3), mmax(3)
!
gg(1) = (m1 * bg(1, 1) + m2 * bg(1, 2) + m3 * bg(1, 3) + q(1)) * (twopi / alat)
gg(2) = (m1 * bg(2, 1) + m2 * bg(2, 2) + m3 * bg(2, 3) + q(2)) * (twopi / alat)
@ -896,12 +1025,15 @@
ENDIF
!
IF (qeq > 0.0d0 .AND. qeq / (metric * alph * 4.0d0) < gmax) THEN
!
!nGtest = nGtest + 1
!
IF (system_2d) THEN
facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / (SQRT(qeq) * (1.0 + grg * SQRT(qeq)))
ELSE
! facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / qeq <-- this is correct
facqd = fac * EXP(-qeq * DSQRT(metric) / (metric * alph * 4.0d0)) / qeq ! <-- this is to keep as previous
facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / qeq !<-- this is correct
!facqd = fac * EXP(-qeq * DSQRT(metric) / (metric * alph * 4.0d0)) / qeq ! <-- this is to keep as previous
!facqd = fac / qeq ! no Ewald
ENDIF
!
DO na = 1, nat
@ -936,6 +1068,8 @@
ENDDO ! m2
ENDDO ! m1
!
!write(*,*) "Number of G-vectors within cutoff:", nGtest
!
! In case we want only the short-range we do
! g_s = DSQRT(g*g - g_l*g_l)
!
@ -1691,8 +1825,8 @@
!! (2*pi/a)^2
REAL(KIND = DP):: geg
!! <q+G| epsil | q+G>
REAL(KIND = DP) :: alph
!! Ewald parameter
!REAL(KIND = DP) :: alph
!!! Ewald parameter
REAL(KIND = DP) :: fac
!! General prefactor
REAL(KIND = DP) :: gg(3)
@ -1701,8 +1835,8 @@
!! fac * EXP(-geg / (alph * 4.0d0)) / geg
REAL(KIND = DP) :: arg
!! Argument of the exponential
REAL(KIND = DP) :: gmax
!! Maximum G
!REAL(KIND = DP) :: gmax
!!! Maximum G
REAL(KIND = DP) :: arg_no_g(3)
!! Difference of atomic position
REAL(KIND = DP) :: zag(3)
@ -1721,15 +1855,15 @@
! very rough estimate: geg/4/alph > gmax = 14
! (exp (-14) = 10^-6)
!
gmax = 14.d0
alph = 1.0d0
!gmax = 14.d0
!alph = 1.0d0
metric = (twopi / alat)**2
geg = gmax * alph * 4.0d0
!
IF (ABS(ABS(signe) - 1.0) > eps6) CALL errore('rgd_blk_der', ' wrong value for signe ', 1)
!
gmax = 14.d0
alph = 1.0d0
!gmax = 14.d0
!alph = 1.0d0
geg = gmax * alph * 4.0d0
fac = signe * e2 * fpi / omega
!
@ -1887,10 +2021,10 @@
!! Z^* \cdot (q+g)
REAL(KIND = DP) :: gg(3)
!! G-vector
REAL(KIND = DP) :: gmax
!! Max G-vector
REAL(KIND = DP) :: alph
!! Ewald factor (arbitrary, here chosen to be 1)
!REAL(KIND = DP) :: gmax
!!! Max G-vector
!REAL(KIND = DP) :: alph
!!! Ewald factor (arbitrary, here chosen to be 1)
REAL(KIND = DP) :: geg
!! <G| epsil | G>
REAL(KIND = DP) :: reff(2, 2)
@ -1933,8 +2067,8 @@
fac = (signe * e2 * fpi * ci) / omega
ENDIF
!
gmax = 14.d0
alph = 1.0d0
!gmax = 14.d0
!alph = 1.0d0
metric = (twopi / alat)**2
geg = gmax * alph * 4.0d0
!
@ -1985,8 +2119,8 @@
IF (system_2d) THEN
facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / (SQRT(qeq) * (1.0 + grg * SQRT(qeq)))
ELSE
! facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / qeq <-- this is correct
facqd = fac * EXP(-qeq * DSQRT(metric) / (metric * alph * 4.0d0)) / qeq ! <-- this is to keep as previous
facqd = fac * EXP(-qeq / (metric * alph * 4.0d0)) / qeq !<-- this is correct
!facqd = fac * EXP(-qeq * DSQRT(metric) / (metric * alph * 4.0d0)) / qeq ! <-- this is to keep as previous
ENDIF
!
DO na = 1, nat

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -16,6 +16,88 @@
!
CONTAINS
!
!!!!!
!-----------------------------------------------------------------------
SUBROUTINE mix_wrap(ndim, deltaout, deltain, alphamix, iter, n_iter, conv, df, dv)
!-----------------------------------------------------------------------
!!
!! SH: Wrapper for the linear/broyden mixings (Nov 2021).
!! Note: the linear mixing option is implemented for
!! benchmarking/development purposes and can be invoked by
!! setting the broyden_beta parameter to a negative value.
!!
!
USE kinds, ONLY : DP
USE constants_epw, ONLY : zero
!
IMPLICIT NONE
!
LOGICAL, INTENT(in) :: conv
!! If true convergence reached
!
INTEGER, INTENT(in) :: ndim
!! Dimension of arrays deltaout, deltain
INTEGER, INTENT(in) :: iter
!! Current iteration number
INTEGER, INTENT(in) :: n_iter
!! Number of iterations used in the mixing
!
REAL(KIND = DP), INTENT(in) :: alphamix
!! Mixing factor (0 < alphamix <= 1)
REAL(KIND = DP), INTENT(inout) :: deltaout(ndim)
!! output delta at current iteration
REAL(KIND = DP), INTENT(inout) :: deltain(ndim)
!! delta at previous iteration
REAL(KIND = DP), INTENT(inout) :: df(ndim, n_iter)
!! arrays containing info from previous iterations
REAL(KIND = DP), INTENT(inout) :: dv(ndim, n_iter)
!! arrays containing info from previous iterations
!
IF (alphamix < zero ) THEN
CALL mix_linear(ndim, deltaout, deltain, alphamix)
ELSE
CALL mix_broyden(ndim, deltaout, deltain, alphamix, iter, n_iter, conv, df, dv)
ENDIF
!
RETURN
!
!-----------------------------------------------------------------------
END SUBROUTINE mix_wrap
!-----------------------------------------------------------------------
!
!-----------------------------------------------------------------------
SUBROUTINE mix_linear(ndim, arout, arin, mixf)
!-----------------------------------------------------------------------
!!
!! SH: Simple linear mixing for gap, normalization, shift, etc (Nov 2021).
!!
!
USE kinds, ONLY : DP
!
IMPLICIT NONE
!
INTEGER, INTENT(in) :: ndim
!! Dimension of arrays deltaout, deltain
!
REAL(KIND = DP), INTENT(in) :: mixf
!! Mixing factor (0 < alphamix <= 1)
REAL(KIND = DP), INTENT(inout) :: arout(ndim)
!! output delta at current iteration
REAL(KIND = DP), INTENT(inout) :: arin(ndim)
!! delta at previous iteration
!
! Local variables
INTEGER :: i
!
DO i = 1, ndim
arin(i) = DABS(mixf) * arin(i) + (1.d0 - DABS(mixf)) * arout(i)
ENDDO
!
!-----------------------------------------------------------------------
END SUBROUTINE mix_linear
!-----------------------------------------------------------------------
!
!!!!!
!-----------------------------------------------------------------------
SUBROUTINE mix_broyden(ndim, deltaout, deltain, alphamix, iter, n_iter, conv, df, dv)
!-----------------------------------------------------------------------

View File

@ -1868,7 +1868,7 @@
na = MOD(irn - 1, nat) + 1
!
DO iw = 1, dims
CALL ZAXPY(nrr_k * 3, cfac(iw, na, ir), epmatwp(iw, :, :, 3 * (na - 1) + 1:3 * na, ir), 1, &
CALL ZAXPY(nbnd * nrr_k * 3, cfac(iw, na, ir), epmatwp(iw, :, :, 3 * (na - 1) + 1:3 * na, ir), 1, &
eptmp(iw, :, :, 3 * (na - 1) + 1:3 * na), 1)
ENDDO
ENDDO

View File

@ -1,8 +1,8 @@
###########################################################
# FFT
# External FFT library that FFTXLIB depends on
# The following targets will be defined:
add_library(qe_fft INTERFACE)
qe_install_targets(qe_fft)
add_library(qe_ext_fft INTERFACE)
qe_install_targets(qe_ext_fft)
###########################################################
SET(BLA_VENDOR_SAVED ${BLA_VENDOR})
if(QE_FFTW_VENDOR STREQUAL "AUTO")
@ -11,22 +11,21 @@ if(QE_FFTW_VENDOR STREQUAL "AUTO")
if(VendorFFTW_FOUND)
if(VendorFFTW_ID STREQUAL "Intel")
qe_add_global_compile_definitions(__DFTI)
set(qe_fft_wrappers fft_scalar.DFTI.f90)
set(qe_ext_fft_wrappers fft_scalar.DFTI.f90)
elseif(VendorFFTW_ID STREQUAL "Arm")
## ARMPL implements the same interface of the standard FFTW
## no need of QE ARMPL wrapper
#qe_add_global_compile_definitions(__ARM_LIB)
#set(qe_fft_wrappers fft_scalar.ARM_LIB.f90)
#set(qe_ext_fft_wrappers fft_scalar.ARM_LIB.f90)
qe_add_global_compile_definitions(__FFTW3)
set(qe_fft_wrappers fft_scalar.FFTW3.f90)
set(qe_ext_fft_wrappers fft_scalar.FFTW3.f90)
elseif(VendorFFTW_ID STREQUAL "IBMESSL")
qe_add_global_compile_definitions(__LINUX_ESSL)
set(qe_fft_wrappers fft_scalar.ESSL.f90)
set(qe_ext_fft_wrappers fft_scalar.ESSL.f90)
endif()
target_link_libraries(qe_fft INTERFACE VendorFFTW)
target_link_libraries(qe_ext_fft INTERFACE VendorFFTW)
else()
# Try to find the official FFTW3
message(STATUS "No vendor FFTW found")
if(QE_ENABLE_OPENMP)
find_package(FFTW3 COMPONENTS DOUBLE_OPENMP DOUBLE)
else()
@ -34,8 +33,8 @@ if(QE_FFTW_VENDOR STREQUAL "AUTO")
endif()
if(FFTW3_FOUND)
qe_add_global_compile_definitions(__FFTW3)
set(qe_fft_wrappers fft_scalar.FFTW3.f90)
target_link_libraries(qe_fft INTERFACE FFTW3)
set(qe_ext_fft_wrappers fft_scalar.FFTW3.f90)
target_link_libraries(qe_ext_fft INTERFACE FFTW3)
else()
message(STATUS "CMake variable FFTW3_ROOT may be used to hint the root directory of FFTW3 installation.")
# Cannot find anything useful, just point out internal FFTW fallback.
@ -48,10 +47,10 @@ if(QE_FFTW_VENDOR STREQUAL "AUTO")
elseif(QE_FFTW_VENDOR MATCHES "Intel")
if(QE_FFTW_VENDOR STREQUAL "Intel_DFTI")
qe_add_global_compile_definitions(__DFTI)
set(qe_fft_wrappers fft_scalar.DFTI.f90)
set(qe_ext_fft_wrappers fft_scalar.DFTI.f90)
elseif(QE_FFTW_VENDOR STREQUAL "Intel_FFTW3")
qe_add_global_compile_definitions(__FFTW3)
set(qe_fft_wrappers fft_scalar.FFTW3.f90)
set(qe_ext_fft_wrappers fft_scalar.FFTW3.f90)
else()
message(FATAL_ERROR "The unknown Intel FFTW library '${QE_FFTW_VENDOR}' is not supported!")
endif()
@ -63,7 +62,7 @@ elseif(QE_FFTW_VENDOR MATCHES "Intel")
find_package(LAPACK QUIET)
find_package(VendorFFTW REQUIRED COMPONENTS MKL)
if(VendorFFTW_FOUND)
target_link_libraries(qe_fft INTERFACE VendorFFTW)
target_link_libraries(qe_ext_fft INTERFACE VendorFFTW)
message(STATUS "Found ${QE_FFTW_VENDOR} library")
else()
message(FATAL_ERROR "Failed to find ${QE_FFTW_VENDOR} library. "
@ -79,8 +78,8 @@ elseif(QE_FFTW_VENDOR STREQUAL "ArmPL")
find_package(VendorFFTW REQUIRED COMPONENTS ArmPL)
if(VendorFFTW_FOUND)
qe_add_global_compile_definitions(__FFTW3)
set(qe_fft_wrappers fft_scalar.FFTW3.f90)
target_link_libraries(qe_fft INTERFACE VendorFFTW)
set(qe_ext_fft_wrappers fft_scalar.FFTW3.f90)
target_link_libraries(qe_ext_fft INTERFACE VendorFFTW)
message(STATUS "Found ${QE_FFTW_VENDOR} library")
else()
message(FATAL_ERROR "Failed to find ${QE_FFTW_VENDOR} library. "
@ -92,8 +91,8 @@ elseif(QE_FFTW_VENDOR STREQUAL "IBMESSL")
find_package(VendorFFTW REQUIRED COMPONENTS IBMESSL)
if(VendorFFTW_FOUND)
qe_add_global_compile_definitions(__LINUX_ESSL)
set(qe_fft_wrappers fft_scalar.ESSL.f90)
target_link_libraries(qe_fft INTERFACE VendorFFTW)
set(qe_ext_fft_wrappers fft_scalar.ESSL.f90)
target_link_libraries(qe_ext_fft INTERFACE VendorFFTW)
message(STATUS "Found ${QE_FFTW_VENDOR} library")
else()
message(FATAL_ERROR "Failed to find ${QE_FFTW_VENDOR} library. "
@ -107,8 +106,8 @@ elseif(QE_FFTW_VENDOR STREQUAL "FFTW3")
endif()
if(FFTW3_FOUND)
qe_add_global_compile_definitions(__FFTW3)
set(qe_fft_wrappers fft_scalar.FFTW3.f90)
target_link_libraries(qe_fft INTERFACE FFTW3)
set(qe_ext_fft_wrappers fft_scalar.FFTW3.f90)
target_link_libraries(qe_ext_fft INTERFACE FFTW3)
message(STATUS "Found FFTW3 library")
else()
message(FATAL_ERROR "Failed to find ${QE_FFTW_VENDOR} library. "
@ -117,87 +116,19 @@ elseif(QE_FFTW_VENDOR STREQUAL "FFTW3")
elseif(QE_FFTW_VENDOR STREQUAL "Internal")
message(STATUS "QE internal implementation of FFTW (FFTXLib)")
qe_add_global_compile_definitions(__FFTW)
set(qe_fft_wrappers fft_scalar.FFTW.f90)
set(qe_ext_fft_wrappers fft_scalar.FFTW.f90)
else()
message(FATAL_ERROR "The FFTW vendor library '${QE_FFTW_VENDOR}' is not supported!")
endif()
SET(BLA_VENDOR ${BLA_VENDOR_SAVED})
set(f_src_fftx
fft_scatter.f90
fft_scatter_2d.f90
scatter_mod.f90
fft_ggen.f90
fft_fwinv.f90
fft_scalar.f90
fftw_interfaces.f90
fft_parallel.f90
fft_parallel_2d.f90
fft_interfaces.f90
fft_interpolate.f90
stick_base.f90
fft_smallbox.f90
fft_smallbox_type.f90
fft_support.f90
fft_error.f90
fft_types.f90
tg_gather.f90
fft_helper_subroutines.f90
fft_param.f90)
if(QE_ENABLE_CUDA)
set(f_src_fftx
${f_src_fftx}
fft_scalar.cuFFT.f90
fft_buffers.f90
fft_scatter_gpu.f90
fft_scatter_2d_gpu.f90)
endif()
qe_enable_cuda_fortran("${f_src_fftx}")
set(c_src_fftx
fft_stick.c
fftw.c
fftw_sp.c
fftw_dp.c)
qe_add_library(qe_fftx
${f_src_fftx}
${c_src_fftx}
${qe_fft_wrappers})
target_link_libraries(qe_fftx
PRIVATE
qe_fft
qe_openmp_fortran
qe_mpi_fortran)
if(QE_ENABLE_CUDA)
target_link_libraries(qe_fftx
PRIVATE
CUDA::cufft)
endif()
qe_install_targets(qe_fftx)
add_subdirectory(src)
###########################################################
# Tests
# TODO move all tests to a proper location
###########################################################
if(QE_ENABLE_TEST)
set(src_fftx_test fft_test.f90)
qe_add_executable(qe_fftx_test ${src_fftx_test})
set_target_properties(qe_fftx_test
PROPERTIES
OUTPUT_NAME qe_fftx_test.x
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
target_link_libraries(qe_fftx_test
PRIVATE
qe_openmp_fortran
qe_mpi_fortran
qe_fftx)
add_unit_test(test_qe_fftx-r1-t1 1 1 $<TARGET_FILE:qe_fftx_test>)
add_unit_test(test_qe_fftx-r1-t3 1 3 $<TARGET_FILE:qe_fftx_test>)
add_unit_test(test_qe_fftx-r3-t1 3 1 $<TARGET_FILE:qe_fftx_test>)
add_unit_test(test_qe_fftx-r3-t2 3 2 $<TARGET_FILE:qe_fftx_test>)
add_subdirectory(tests)
endif()

View File

@ -1,18 +0,0 @@
project: FFTXlib
project_dir: .
output_dir: ./Doc
predocmark: >
docmark_alt: #
predocmark_alt: <
display: public
private
exclude: fft_scalar.DFTI.f90
fft_scalar.ESSL.f90
fft_scalar.FFTW3.f90
fft_scalar.FFTW.f90
fft_scalar.SX6.f90
mpif.h
include: /cineca/prod/compilers/intel/cs-xe-2015/binary/impi_5.0.2/include64/
graph: true
A self-contained library for handling FFT in QE

View File

@ -1,66 +1,16 @@
# Makefile for FFTXlib
# Makefile for FFTXLIB
sinclude ../make.inc
include ../make.inc
default: all
FFTX = \
scatter_mod.o \
fft_scatter_2d.o \
fft_scatter_gpu.o \
fft_scatter_2d_gpu.o \
fft_ggen.o \
fft_fwinv.o \
fft_scalar.o \
fft_scalar.DFTI.o \
fft_scalar.ESSL.o \
fft_scalar.FFTW.o \
fft_scatter.o \
fft_scalar.cuFFT.o \
fftw_interfaces.o \
fft_scalar.FFTW3.o \
fft_scalar.SX6.o \
fft_parallel.o \
fft_parallel_2d.o \
fft_interfaces.o \
fft_interpolate.o \
stick_base.o \
fftw.o \
fftw_sp.o \
fftw_dp.o \
fft_smallbox.o \
fft_smallbox_type.o \
fft_support.o \
fft_error.o \
fft_stick.o \
fft_types.o \
tg_gather.o \
fft_helper_subroutines.o \
fft_param.o \
fft_buffers.o
all: fftx
fftx:
( cd src ; $(MAKE) all || exit 1 )
all : libqefft.a
fftx_clean:
( cd src ; $(MAKE) clean )
libqefft.a: $(FFTX)
$(AR) $(ARFLAGS) $@ $?
$(RANLIB) $@
clean: fftx_clean
fft_scalar.o : fft_scalar.f90 fft_scalar.FFTW3.f90 fft_scalar.FFTW.f90 fft_scalar.SX6.f90 fft_scalar.DFTI.f90 fft_scalar.ESSL.f90
fft_stick.o : fft_stick.c fftw_sp.h fftw_dp.h
fftw.o : fftw.c fftw.h
fftw_sp.o : fftw_sp.c fftw_sp.h fftw.h konst.h
fftw_dp.o : fftw_dp.c fftw_dp.h fftw.h konst.h
test : fft_test.o libqefft.a
$(LD) $(LDFLAGS) -o fft_test.x fft_test.o libqefft.a $(FFT_LIBS) $(BLAS_LIBS) $(MPI_LIBS) $(LD_LIBS)
TEST : test # compatibility
clean :
- /bin/rm -f *.o *.a *.d *.i *~ *_tmp.f90 *.mod *.L
include make.depend
distclean: clean

View File

@ -74,6 +74,5 @@ Misc. helper routines:
Tests:
test0.f90
test.f90

View File

@ -0,0 +1,59 @@
###########################################################
# qe_fftx target
###########################################################
set(f_src_fftx
fft_scatter.f90
fft_scatter_2d.f90
scatter_mod.f90
fft_ggen.f90
fft_fwinv.f90
fft_scalar.f90
fftw_interfaces.f90
fft_parallel.f90
fft_parallel_2d.f90
fft_interfaces.f90
fft_interpolate.f90
stick_base.f90
fft_smallbox.f90
fft_smallbox_type.f90
fft_support.f90
fft_error.f90
fft_types.f90
tg_gather.f90
fft_helper_subroutines.f90
fft_param.f90)
if(QE_ENABLE_CUDA)
set(f_src_fftx
${f_src_fftx}
fft_scalar.cuFFT.f90
fft_buffers.f90
fft_scatter_gpu.f90
fft_scatter_2d_gpu.f90)
endif()
qe_enable_cuda_fortran("${f_src_fftx}")
set(c_src_fftx
fft_stick.c
fftw.c
fftw_sp.c
fftw_dp.c)
qe_add_library(qe_fftx
${f_src_fftx}
${c_src_fftx}
${qe_ext_fft_wrappers})
target_link_libraries(qe_fftx
PRIVATE
qe_ext_fft
qe_openmp_fortran
qe_mpi_fortran)
if(QE_ENABLE_CUDA)
target_link_libraries(qe_fftx
PUBLIC
qe_openacc_fortran
PRIVATE
CUDA::cufft)
endif()
qe_install_targets(qe_fftx)

61
FFTXlib/src/Makefile Normal file
View File

@ -0,0 +1,61 @@
# Makefile for FFTXlib
include ../../make.inc
FFTX = \
scatter_mod.o \
fft_scatter_2d.o \
fft_scatter_gpu.o \
fft_scatter_2d_gpu.o \
fft_ggen.o \
fft_fwinv.o \
fft_scalar.o \
fft_scalar.DFTI.o \
fft_scalar.ESSL.o \
fft_scalar.FFTW.o \
fft_scatter.o \
fft_scalar.cuFFT.o \
fftw_interfaces.o \
fft_scalar.FFTW3.o \
fft_scalar.SX6.o \
fft_parallel.o \
fft_parallel_2d.o \
fft_interfaces.o \
fft_interpolate.o \
stick_base.o \
fftw.o \
fftw_sp.o \
fftw_dp.o \
fft_smallbox.o \
fft_smallbox_type.o \
fft_support.o \
fft_error.o \
fft_stick.o \
fft_types.o \
tg_gather.o \
fft_helper_subroutines.o \
fft_param.o \
fft_buffers.o
all : libqefft.a
libqefft.a: $(FFTX)
$(AR) $(ARFLAGS) $@ $?
$(RANLIB) $@
fft_scalar.o : fft_scalar.f90 fft_scalar.FFTW3.f90 fft_scalar.FFTW.f90 fft_scalar.SX6.f90 fft_scalar.DFTI.f90 fft_scalar.ESSL.f90
fft_stick.o : fft_stick.c fftw_sp.h fftw_dp.h
fftw.o : fftw.c fftw.h
fftw_sp.o : fftw_sp.c fftw_sp.h fftw.h konst.h
fftw_dp.o : fftw_dp.c fftw_dp.h fftw.h konst.h
clean :
- /bin/rm -f *.o *.a *.d *.i *~ *_tmp.f90 *.mod *.L
include make.depend

View File

@ -23,7 +23,7 @@ MODULE fft_helper_subroutines
#endif
END INTERFACE
PRIVATE
PUBLIC :: fftx_threed2oned, fftx_threed2oned_gpu, fftx_oned2threed
PUBLIC :: fftx_threed2oned, fftx_oned2threed
PUBLIC :: tg_reduce_rho
PUBLIC :: tg_get_nnr, tg_get_recip_inc, fftx_ntgrp, fftx_tgpe, &
tg_get_group_nr3
@ -468,63 +468,86 @@ CONTAINS
END IF
END SUBROUTINE
SUBROUTINE fftx_threed2oned( desc, vin, vout1, vout2 )
!! Copy charge density from 3D array to 1D array in Fourier
!! space.
USE fft_param
USE fft_types, ONLY : fft_type_descriptor
TYPE(fft_type_descriptor), INTENT(in) :: desc
complex(DP), INTENT(OUT) :: vout1(:)
complex(DP), OPTIONAL, INTENT(OUT) :: vout2(:)
complex(DP), INTENT(IN) :: vin(:)
COMPLEX(DP) :: fp, fm
INTEGER :: ig
IF( PRESENT( vout2 ) ) THEN
DO ig=1,desc%ngm
fp=vin(desc%nl(ig))+vin(desc%nlm(ig))
fm=vin(desc%nl(ig))-vin(desc%nlm(ig))
vout1(ig) = 0.5d0*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
vout2(ig) = 0.5d0*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
END DO
ELSE
DO ig=1,desc%ngm
vout1(ig) = vin(desc%nl(ig))
END DO
END IF
END SUBROUTINE
SUBROUTINE fftx_threed2oned_gpu( desc, vin_d, vout1_d, vout2_d )
SUBROUTINE fftx_threed2oned( desc, vin, vout1, vout2, gpu_args_ )
!! GPU version of \(\texttt{fftx_threed2oned}\).
USE fft_param
USE fft_types, ONLY : fft_type_descriptor
TYPE(fft_type_descriptor), INTENT(in) :: desc
complex(DP), INTENT(OUT) :: vout1_d(:)
complex(DP), OPTIONAL, INTENT(OUT) :: vout2_d(:)
complex(DP), INTENT(IN) :: vin_d(:)
INTEGER, POINTER :: nl_d(:), nlm_d(:)
USE fft_types, ONLY : fft_type_descriptor
!
TYPE(fft_type_descriptor), INTENT(IN) :: desc
COMPLEX(DP), INTENT(OUT) :: vout1(:)
COMPLEX(DP), OPTIONAL, INTENT(OUT) :: vout2(:)
COMPLEX(DP), INTENT(IN) :: vin(:)
LOGICAL, INTENT(IN), OPTIONAL :: gpu_args_
!
COMPLEX(DP) :: fp, fm
INTEGER :: ig
LOGICAL :: gpu_args
!
#if defined(__CUDA) && defined(_OPENACC)
attributes(DEVICE) :: nl_d, nlm_d
!$acc data deviceptr( vout1_d(:), vout2_d(:), vin_d(:) )
INTEGER, POINTER, DEVICE :: nl_d(:), nlm_d(:)
!
nl_d => desc%nl_d
nlm_d => desc%nlm_d
IF( PRESENT( vout2_d ) ) THEN
!$acc parallel loop
DO ig=1,desc%ngm
fp=vin_d(nl_d(ig))+vin_d(nlm_d(ig))
fm=vin_d(nl_d(ig))-vin_d(nlm_d(ig))
vout1_d(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
vout2_d(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
END DO
ELSE
!$acc parallel loop
DO ig=1,desc%ngm
vout1_d(ig) = vin_d(nl_d(ig))
END DO
END IF
!$acc end data
#else
INTEGER, ALLOCATABLE :: nl_d(:), nlm_d(:)
!
ALLOCATE( nl_d(desc%ngm) )
nl_d = desc%nl
IF( PRESENT( vout2 ) ) THEN
ALLOCATE( nlm_d(desc%ngm) )
nlm_d = desc%nlm
ENDIF
!$acc data copyin( nl_d, nlm_d )
#endif
gpu_args = .FALSE.
IF ( PRESENT(gpu_args_) ) gpu_args = gpu_args_
!
IF ( .NOT. gpu_args ) THEN
!
!$acc data copyin( vin ) copyout( vout1, vout2 )
IF( PRESENT( vout2 ) ) THEN
!$acc parallel loop
DO ig = 1, desc%ngm
fp = vin(nl_d(ig))+vin(nlm_d(ig))
fm = vin(nl_d(ig))-vin(nlm_d(ig))
vout1(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
vout2(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
ENDDO
ELSE
!$acc parallel loop
DO ig = 1, desc%ngm
vout1(ig) = vin(nl_d(ig))
ENDDO
ENDIF
!$acc end data
!
ELSE
!
!$acc data present( vout1(:), vout2(:), vin(:) )
IF( PRESENT( vout2 ) ) THEN
!$acc parallel loop
DO ig = 1, desc%ngm
fp = vin(nl_d(ig))+vin(nlm_d(ig))
fm = vin(nl_d(ig))-vin(nlm_d(ig))
vout1(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX( DBLE(fp),AIMAG(fm),kind=DP)
vout2(ig) = CMPLX(0.5d0,0.d0,kind=DP)*CMPLX(AIMAG(fp),-DBLE(fm),kind=DP)
ENDDO
ELSE
!$acc parallel loop
DO ig = 1, desc%ngm
vout1(ig) = vin(nl_d(ig))
ENDDO
ENDIF
!$acc end data
!
ENDIF
!
#if !defined(__CUDA) || !defined(_OPENACC)
!$acc end data
DEALLOCATE( nl_d )
IF( PRESENT( vout2 ) ) DEALLOCATE( nlm_d )
#endif
!
END SUBROUTINE
SUBROUTINE fftx_psi2c_gamma( desc, vin, vout1, vout2 )

View File

@ -20,11 +20,11 @@ MODULE fft_param
INTEGER, PARAMETER :: MPI_COMM_SELF = -2
#endif
INTEGER, PARAMETER :: ndims = 10
INTEGER, PARAMETER :: ndims = 20
!! Number of different FFT tables that the module
!!could keep into memory without reinitialization
INTEGER, PARAMETER :: nfftx = 2049
INTEGER, PARAMETER :: nfftx = 16385
!!Max allowed fft dimension
INTEGER, PARAMETER :: DP = selected_real_kind(14,200)

Some files were not shown because too many files have changed in this diff Show More