diff --git a/EPW/CMakeLists.txt b/EPW/CMakeLists.txt
index 3c5801de1..a4627cf1c 100644
--- a/EPW/CMakeLists.txt
+++ b/EPW/CMakeLists.txt
@@ -51,9 +51,8 @@ set(sources
     src/loadbm.f90
     src/bfield.f90
     src/io_indabs.f90
-    src/ephblochkq.f90
-    src/wfc_elec.f90
-    src/test_tools.f90)
+    src/polaron.f90
+    )
 
 qe_add_library(qe_epw ${sources})
 target_link_libraries(
diff --git a/EPW/src/Makefile b/EPW/src/Makefile
index ac8545d3b..1303fa881 100644
--- a/EPW/src/Makefile
+++ b/EPW/src/Makefile
@@ -71,8 +71,8 @@ epw_summary.o \
 loadumat.o \
 stop_epw.o \
 wannierEPW.o \
-loadbm.o
-EPWOBJS += ephblochkq.o wfc_elec.o test_tools.o
+loadbm.o \
+polaron.o \
 
 #default : epw
 
diff --git a/EPW/src/bcast_epw_input.f90 b/EPW/src/bcast_epw_input.f90
index 086412892..a1518870f 100644
--- a/EPW/src/bcast_epw_input.f90
+++ b/EPW/src/bcast_epw_input.f90
@@ -78,16 +78,21 @@
   USE io_global,     ONLY : meta_ionode_id
   USE control_flags, ONLY : iverbosity
   USE ions_base,     ONLY : amass
-  ! ---------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  USE epwcom,        ONLY : wfcelec, model_vertex , polaron_wf, r01, r02, r03,&
-                            num_cbands, start_band, start_mode, cb_shift,     &
-                            polaron_interpol, polaron_bq, polaron_dos,        &
-                            electron_dos, phonon_dos, diag_mode,              &
-                            restart_polaron_mode, polaron_type,               &
-                            emax_plrn, nDOS_plrn, emin_plrn
-  ! -------------------------------------------------------------------------------
+  ! Added for polaron calculations by Chao Lian
+  USE epwcom,        ONLY : plrn, restart_plrn, conv_thr_plrn, end_band_plrn,  lrot,  &
+                            cal_psir_plrn, start_band_plrn,  type_plrn, nstate_plrn,  &
+                            interp_Ank_plrn, interp_Bqu_plrn, init_sigma_plrn,        &
+                            full_diagon_plrn, mixing_Plrn, init_plrn, niter_plrn,     &
+                            nDOS_plrn, edos_max_plrn, edos_min_plrn, edos_sigma_plrn, &
+                            pdos_sigma_plrn, pdos_max_plrn, pdos_min_plrn,            &
+                            seed_plrn, ethrdg_plrn, time_rev_A_plrn, nhblock_plrn,    &
+                            beta_plrn, Mmn_plrn, recal_Mmn_plrn, r0_plrn, debug_plrn, &
+                            time_rev_U_plrn,  g_start_band_plrn, g_end_band_plrn,     &
+                            g_start_energy_plrn, g_end_energy_plrn,                   &
+                            model_vertex_plrn, model_enband_plrn, model_phfreq_plrn,  &
+                            kappa_plrn, omega_LO_plrn, m_eff_plrn, step_wf_grid_plrn, &
+                            scell_mat_plrn, scell_mat
+  !
   !
   IMPLICIT NONE
   !
@@ -300,29 +305,54 @@
   CALL mp_bcast(scdm_entanglement, meta_ionode_id, world_comm)
   !
   ! ---------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  CALL mp_bcast (wfcelec         , meta_ionode_id, world_comm)
-  CALL mp_bcast (model_vertex    , meta_ionode_id, world_comm)
-  CALL mp_bcast (polaron_wf      , meta_ionode_id, world_comm)
-  CALL mp_bcast (polaron_interpol, meta_ionode_id, world_comm)
-  CALL mp_bcast (polaron_bq      , meta_ionode_id, world_comm)
-  CALL mp_bcast (polaron_dos     , meta_ionode_id, world_comm)
-  CALL mp_bcast (electron_dos    , meta_ionode_id, world_comm)
-  CALL mp_bcast (phonon_dos      , meta_ionode_id, world_comm)
-  CALL mp_bcast (num_cbands  , meta_ionode_id, world_comm)
-  CALL mp_bcast (start_band  , meta_ionode_id, world_comm)
-  CALL mp_bcast (start_mode  , meta_ionode_id, world_comm)
-  CALL mp_bcast (cb_shift    , meta_ionode_id, world_comm)
-  CALL mp_bcast (diag_mode   , meta_ionode_id, world_comm)
-  CALL mp_bcast (restart_polaron_mode, meta_ionode_id, world_comm)
-  CALL mp_bcast (polaron_type, meta_ionode_id, world_comm)
-  CALL mp_bcast (r01           , meta_ionode_id, world_comm)
-  CALL mp_bcast (r02           , meta_ionode_id, world_comm)
-  CALL mp_bcast (r03           , meta_ionode_id, world_comm)
-  CALL mp_bcast (nDOS_plrn     , meta_ionode_id, world_comm)
-  CALL mp_bcast (emax_plrn     , meta_ionode_id, world_comm)
-  CALL mp_bcast (emin_plrn     , meta_ionode_id, world_comm)
+  ! Added for polaron calculations by Chao Lian.
+  CALL mp_bcast(lrot               , meta_ionode_id, world_comm)
+  CALL mp_bcast(plrn ,               meta_ionode_id, world_comm)
+  CALL mp_bcast(cal_psir_plrn,       meta_ionode_id, world_comm)
+  CALL mp_bcast(interp_Ank_plrn,     meta_ionode_id, world_comm)
+  CALL mp_bcast(interp_Bqu_plrn,     meta_ionode_id, world_comm)
+  CALL mp_bcast(start_band_plrn,     meta_ionode_id, world_comm)
+  CALL mp_bcast(end_band_plrn,       meta_ionode_id, world_comm)
+  CALL mp_bcast(type_plrn,           meta_ionode_id, world_comm)
+  CALL mp_bcast(nDOS_plrn,           meta_ionode_id, world_comm)
+  CALL mp_bcast(edos_max_plrn,       meta_ionode_id, world_comm)
+  CALL mp_bcast(edos_min_plrn,       meta_ionode_id, world_comm)
+  CALL mp_bcast(nstate_plrn,         meta_ionode_id, world_comm)
+  CALL mp_bcast(niter_plrn,          meta_ionode_id, world_comm)
+  CALL mp_bcast(conv_thr_plrn,       meta_ionode_id, world_comm)
+  CAll mp_bcast(restart_plrn,        meta_ionode_id, world_comm)
+  CAll mp_bcast(type_plrn,           meta_ionode_id, world_comm)
+  CAll mp_bcast(init_sigma_plrn,     meta_ionode_id, world_comm)
+  CAll mp_bcast(ethrdg_plrn,         meta_ionode_id, world_comm)
+  CAll mp_bcast(time_rev_A_plrn,     meta_ionode_id, world_comm)
+  CAll mp_bcast(time_rev_U_plrn,     meta_ionode_id, world_comm)
+  CAll mp_bcast(debug_plrn,          meta_ionode_id, world_comm)
+  CAll mp_bcast(full_diagon_plrn,    meta_ionode_id, world_comm)
+  CAll mp_bcast(mixing_Plrn,         meta_ionode_id, world_comm)
+  CAll mp_bcast(init_plrn,           meta_ionode_id, world_comm)
+  CAll mp_bcast(Mmn_plrn,            meta_ionode_id, world_comm)
+  CAll mp_bcast(recal_Mmn_plrn,      meta_ionode_id, world_comm)
+  CAll mp_bcast(r0_plrn,             meta_ionode_id, world_comm)
+  CAll mp_bcast(edos_sigma_plrn,     meta_ionode_id, world_comm)
+  CAll mp_bcast(pdos_sigma_plrn,     meta_ionode_id, world_comm)
+  CAll mp_bcast(pdos_max_plrn,       meta_ionode_id, world_comm)
+  CAll mp_bcast(pdos_min_plrn,       meta_ionode_id, world_comm)
+  CAll mp_bcast(seed_plrn,           meta_ionode_id, world_comm)
+  CAll mp_bcast(nhblock_plrn,        meta_ionode_id, world_comm)
+  CAll mp_bcast(beta_plrn,           meta_ionode_id, world_comm)
+  CAll mp_bcast(g_start_band_plrn,   meta_ionode_id, world_comm)
+  CAll mp_bcast(g_end_band_plrn,     meta_ionode_id, world_comm)
+  CAll mp_bcast(g_start_energy_plrn, meta_ionode_id, world_comm)
+  CAll mp_bcast(g_end_energy_plrn,   meta_ionode_id, world_comm)
+  CAll mp_bcast(step_wf_grid_plrn,   meta_ionode_id, world_comm)
+  CAll mp_bcast(model_vertex_plrn,   meta_ionode_id, world_comm)
+  CAll mp_bcast(model_enband_plrn,   meta_ionode_id, world_comm)
+  CAll mp_bcast(model_phfreq_plrn,   meta_ionode_id, world_comm)
+  CAll mp_bcast(kappa_plrn,          meta_ionode_id, world_comm)
+  CAll mp_bcast(omega_LO_plrn,       meta_ionode_id, world_comm)
+  CAll mp_bcast(m_eff_plrn,          meta_ionode_id, world_comm)
+  CALL mp_bcast(scell_mat_plrn,      meta_ionode_id, world_comm)
+  CALL mp_bcast(scell_mat,           meta_ionode_id, world_comm)
   ! --------------------------------------------------------------------------------
 #endif
   !
diff --git a/EPW/src/ephblochkq.f90 b/EPW/src/ephblochkq.f90
deleted file mode 100644
index 5df2192a0..000000000
--- a/EPW/src/ephblochkq.f90
+++ /dev/null
@@ -1,1010 +0,0 @@
-!
-! Copyright (C) 2010-2016 Samuel Ponce', Roxana Margine, Carla Verdi,
-! Feliciano Giustino
-! Copyright (C) 2007-2009 Jesse Noffsinger, Brad Malone, Feliciano
-! Giustino
-!
-! This file is distributed under the terms of the GNU General Public
-! License. See the file `LICENSE' in the root directory of the
-! present distribution, or http://www.gnu.org/copyleft.gpl.txt .
-!
-!-----------------------------------------------------------------------
-MODULE ephblochkq
-    PUBLIC :: gkg, phonon_eigvector, interpol_a_k, interpol_bq, get_cfac, compute_a_re
-CONTAINS
-    SUBROUTINE gkq ( iq, nrr_k, nrr_q, nrr_g, irvec_q, irvec_g, ndegen_k, ndegen_q, ndegen_g, &
-            w2, uf, epmatwef, irvec_r, dims, dims2)
-        !-----------------------------------------------------------------------
-        USE kinds,         ONLY : dp
-        USE pwcom,         ONLY : nbnd, nks, nkstot, isk, &
-            et, xk, ef,  nelec
-        USE cell_base,     ONLY : at, bg, omega, alat
-        USE start_k,       ONLY : nk1, nk2, nk3
-        USE ions_base,     ONLY : nat, amass, ityp, tau
-        USE phcom,         ONLY : nq1, nq2, nq3
-        USE modes,         ONLY : nmodes
-        USE epwcom,        ONLY : nbndsub, fsthick, epwread, longrange, &
-            epwwrite, ngaussw, degaussw, lpolar, lifc, lscreen, &
-            etf_mem, scr_typ, &
-            elecselfen, phonselfen, nest_fn, a2f, specfun_ph, &
-            vme, eig_read, ephwrite, nkf1, nkf2, nkf3, &
-            efermi_read, fermi_energy, specfun_el, band_plot, &
-            nqf1, nqf2, nqf3, mp_mesh_k, restart, prtgkk, &
-            plselfen, specfun_pl, wfcelec
-        USE noncollin_module, ONLY : noncolin
-        USE constants_epw, ONLY : ryd2ev, ryd2mev, one, two, czero, twopi, ci, zero
-        USE io_files,      ONLY : prefix, diropn
-        USE io_global,     ONLY : stdout, ionode
-        USE elph2,         ONLY : cu, cuq, lwin, lwinq,&
-            chw, chw_ks, cvmew, cdmew, rdw, &
-            epmatwp, epmatq, etf, etf_k, etf_ks, xqf, xkf, &
-            wkf, dynq, nqtotf, nkqf, epf17, nkf, nqf, et_ks, &
-            ibndmin, ibndmax, lambda_all, dmec, dmef, vmef, &
-            sigmai_all, sigmai_mode, gamma_all, epsi, zstar, &
-            efnew, ifc, sigmar_all, zi_all, nkqtotf, eps_rpa, &
-            g2_4, wf, nbndskip
-#if defined(__NAG)
-        USE f90_unix_io,   ONLY : FLUSH
-#endif
-        USE mp,            ONLY : mp_barrier, mp_bcast, mp_sum
-        USE io_global,     ONLY : ionode_id
-        USE mp_global,     ONLY : inter_pool_comm, intra_pool_comm, root_pool
-        USE mp_world,      ONLY : mpime
-        USE division,  ONLY : fkbounds
-        USE wan2bloch, ONLY : dynwan2bloch, dynifc2blochf, hamwan2bloch, ephwan2blochp, ephwan2bloch
-        USE rigid_epw,     ONLY : rpa_epsilon, tf_epsilon, compute_umn_f, rgd_blk_epw_fine
-        !
-        IMPLICIT NONE
-        !
-        INTEGER, INTENT (IN) :: iq, nrr_k, nrr_q, nrr_g
-        INTEGER, INTENT (IN) :: irvec_q(:,:), irvec_g(:,:)
-        INTEGER, INTENT (IN) :: ndegen_k(:,:,:), ndegen_q(:,:,:), ndegen_g(:,:,:)
-        REAL(KIND=dp), INTENT (INOUT) :: w2(3*nat)
-        COMPLEX(KIND=dp), INTENT (INOUT) :: uf ( nmodes, nmodes), epmatwef( nbndsub, nbndsub, nrr_k, nmodes)
-        REAL(KIND=dp), INTENT (IN) :: irvec_r(3,nrr_k)
-        !
-        ! Local  variables
-        !! FIXME: dims should be nbnd_sub and intent(in)
-        INTEGER, INTENT (IN) :: dims
-        !! Dims is either nbndsub if use_ws or 1 if not
-        INTEGER, INTENT (IN) :: dims2
-        !! Dims is either nat if use_ws or 1 if not
-        COMPLEX(KIND=dp) :: cfac(nrr_k, dims, dims)
-        !! Used to store $e^{2\pi r \cdot k}$ exponential
-        COMPLEX(KIND=dp) :: cfacq(nrr_k, dims, dims)
-        !! Used to store $e^{2\pi r \cdot k+q}$ exponential
-        COMPLEX(KIND=dp) :: cufkk ( nbndsub, nbndsub )
-        !! Rotation matrix, fine mesh, points k
-        COMPLEX(KIND=dp) :: cufkq (  nbndsub, nbndsub )
-        !! the same, for points k+q
-        COMPLEX(KIND=dp) :: epmatf( nbndsub, nbndsub, nmodes)
-        !! e-p matrix  in smooth Bloch basis, fine mesh
-        COMPLEX(KIND=dp) :: bmatf ( nbndsub, nbndsub)
-        !! overlap U_k+q U_k^\dagger in smooth Bloch basis, fine mesh
-        INTEGER :: nksqtotf
-        !! half of the total number of k+q points (fine grid)
-        INTEGER :: lower_bnd
-        !! lower bound for the k-depend index among the mpi pools
-        INTEGER :: upper_bnd
-        !! lower bound for the k-depend index among the mpi pools
-        INTEGER :: ik
-        !! Counter on coarse k-point grid
-        INTEGER :: ikk
-        !! Counter on k-point when you have paired k and q
-        INTEGER :: ikq
-        !! Paired counter so that q is adjacent to its k
-        INTEGER :: ibnd
-        !! Counter on band
-        INTEGER :: jbnd
-        !! Counter on band
-        INTEGER :: na
-        !! Counter on atom
-        INTEGER :: mu
-        !! counter on mode
-        INTEGER :: nu
-        !! counter on mode
-        INTEGER :: fermicount
-        !! Number of states at the Fermi level
-        INTEGER :: nrws
-        !! Number of real-space Wigner-Seitz
-        INTEGER, PARAMETER :: nrwsx=200
-        !! Maximum number of real-space Wigner-Seitz
-        REAL(KIND=dp) :: xxq(3)
-        !! Current q-point
-        REAL(KIND=dp) :: xxk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkq(3)
-        !! Current k+q point on the fine grid
-        REAL(KIND=dp) :: rws(0:3,nrwsx)
-        !! Real-space wigner-Seitz vectors
-        REAL(KIND=dp), PARAMETER :: eps = 0.01/ryd2mev
-        !! Tolerence
-
-    END SUBROUTINE gkq
-
-    SUBROUTINE phonon_eigvector ( iq, nrr_k, nrr_q, irvec_q, ndegen_q, w2, uf, epmatwef)
-        !-----------------------------------------------------------------------
-        USE kinds,         ONLY : dp
-        USE pwcom,         ONLY : nbnd, nks, nkstot, isk, &
-            et, xk, ef,  nelec
-        USE cell_base,     ONLY : at, bg, omega, alat
-        USE start_k,       ONLY : nk1, nk2, nk3
-        USE ions_base,     ONLY : nat, amass, ityp, tau
-        USE phcom,         ONLY : nq1, nq2, nq3
-        USE modes,         ONLY : nmodes
-        USE epwcom,        ONLY : nbndsub, fsthick, epwread, longrange, &
-            epwwrite, ngaussw, degaussw, lpolar, lifc, lscreen,&
-            etf_mem, scr_typ,&
-            elecselfen, phonselfen, nest_fn, a2f, specfun_ph, &
-            vme, eig_read, ephwrite, nkf1, nkf2, nkf3, &
-            efermi_read, fermi_energy, specfun_el, band_plot, &
-            nqf1, nqf2, nqf3, mp_mesh_k, restart, prtgkk, &
-            plselfen, specfun_pl, wfcelec
-        USE noncollin_module, ONLY : noncolin
-        USE constants_epw, ONLY : ryd2ev, ryd2mev, one, two, czero, twopi, ci, zero
-        USE io_files,      ONLY : prefix, diropn
-        USE io_global,     ONLY : stdout, ionode
-        USE elph2,         ONLY : cu, cuq, lwin, lwinq,&
-            chw, chw_ks, cvmew, cdmew, rdw, &
-            epmatwp, epmatq, etf, etf_k, etf_ks, xqf, xkf, &
-            wkf, dynq, nqtotf, nkqf, epf17, nkf, nqf, et_ks, &
-            ibndmin, ibndmax, lambda_all, dmec, dmef, vmef, &
-            sigmai_all, sigmai_mode, gamma_all, epsi, zstar, &
-            efnew, ifc, sigmar_all, zi_all, nkqtotf, eps_rpa, &
-            g2_4, wf, nbndskip
-#if defined(__NAG)
-        USE f90_unix_io,   ONLY : FLUSH
-#endif
-        USE mp,            ONLY : mp_barrier, mp_bcast, mp_sum
-        USE io_global,     ONLY : ionode_id
-        USE mp_global,     ONLY : inter_pool_comm, intra_pool_comm, root_pool
-        USE mp_world,      ONLY : mpime
-        USE division,  ONLY : fkbounds
-        USE wan2bloch, ONLY : dynwan2bloch, dynifc2blochf, hamwan2bloch, ephwan2blochp
-        !
-        IMPLICIT NONE
-        !
-        !INTEGER :: nrws
-        !REAL(kind=DP) :: xxq(3), w2(3*nat), rws(0:3,200)
-        !COMPLEX(kind=DP) :: uf ( nmodes, nmodes)
-        !
-        INTEGER, INTENT (IN) :: iq !, nrr_q!, ndegen_q(20*nq1*nq2*nq3)
-        INTEGER, INTENT (IN) :: nrr_k, nrr_q, irvec_q(:,:), ndegen_q(:,:,:) ! ! Added for polaron calculations by Chao Lian.
-
-        INTEGER :: nksqtotf, lower_bnd, upper_bnd
-        REAL(KIND=dp), INTENT (INOUT) :: w2(3*nat)
-        COMPLEX(KIND=dp), INTENT (INOUT) :: uf ( nmodes, nmodes), epmatwef( nbndsub,nbndsub, nrr_k, nmodes)
-        COMPLEX(KIND=dp) :: cfac(nrr_k), cfacq(nrr_k), cufkk ( nbndsub, nbndsub),cufkq ( nbndsub, nbndsub), &
-            epmatf( nbndsub, nbndsub, nmodes), bmatf( nbndsub, nbndsub)
-        !
-        ! Local  variables
-        LOGICAL :: already_skipped
-        !! Skipping band during the Wannierization
-        LOGICAL :: exst
-        !! If the file exist
-        LOGICAL :: opnd
-        !! Check whether the file is open.
-        !
-        CHARACTER (LEN=256) :: filint
-        !! Name of the file to write/read
-        CHARACTER (LEN=256) :: namef
-        !! Name of the file
-        CHARACTER (LEN=30)  :: myfmt
-        !! Variable used for formatting output
-        !
-        INTEGER :: ios
-        !! integer variable for I/O control
-        INTEGER :: ik
-        !! Counter on coarse k-point grid
-        INTEGER :: ikk
-        !! Counter on k-point when you have paired k and q
-        INTEGER :: ikq
-        !! Paired counter so that q is adjacent to its k
-        INTEGER :: ibnd
-        !! Counter on band
-        INTEGER :: jbnd
-        !! Counter on band
-        INTEGER :: imode
-        !! Counter on mode
-        INTEGER :: na
-        !! Counter on atom
-        INTEGER :: mu
-        !! counter on mode
-        INTEGER :: nu
-        !! counter on mode
-        INTEGER :: fermicount
-        !! Number of states at the Fermi level
-        INTEGER :: nrec
-        !! record index when reading file
-        INTEGER :: lrepmatw
-        !! record length while reading file
-        INTEGER :: i,j
-        !! Index when writing to file
-        INTEGER :: ikx
-        !! Counter on the coase k-grid
-        INTEGER :: ikfx
-        !! Counter on the fine k-grid.
-        INTEGER :: xkk1, xkq1
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk2, xkq2
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk3, xkq3
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: ir
-        !! Counter for WS loop
-        INTEGER :: nrws
-        !! Number of real-space Wigner-Seitz
-        INTEGER :: valuerss(2)
-        !! Return virtual and resisdent memory from system
-        INTEGER, PARAMETER :: nrwsx=200
-        !! Maximum number of real-space Wigner-Seitz
-        !
-        REAL(KIND=dp) :: rdotk_scal
-        !! Real (instead of array) for $r\cdot k$
-        REAL(KIND=dp) :: xxq(3)
-        !! Current q-point
-        REAL(KIND=dp) :: xxk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkq(3)
-        !! Current k+q point on the fine grid
-        REAL(KIND=dp) :: rws(0:3,nrwsx)
-        !! Real-space wigner-Seitz vectors
-        REAL(KIND=dp) :: atws(3,3)
-        !! Maximum vector: at*nq
-        REAL(KIND=dp), EXTERNAL :: efermig
-        !! External function to calculate the fermi energy
-        REAL(KIND=dp), EXTERNAL :: efermig_seq
-        !! Same but in sequential
-        REAL(KIND=dp), PARAMETER :: eps = 0.01/ryd2mev
-        !! Tolerence
-        !
-        COMPLEX(KIND=dp) :: tableqx (4*nk1+1,2*nkf1+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqy (4*nk2+1,2*nkf2+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqz (4*nk3+1,2*nkf3+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe  (:,:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe_mem  (:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons (written on disk)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfac1(:)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfacq1(:)
-        !
-        !
-    END SUBROUTINE phonon_eigvector
-
-    SUBROUTINE bubble_sort(array,sizes,output,repeat_list)
-        USE kinds,         ONLY : dp
-        !
-        IMPLICIT NONE
-        !
-        INTEGER, INTENT(IN) :: sizes
-        REAL(KIND=dp), INTENT (IN) :: array(sizes)
-        REAL(KIND=dp) :: temp, input_array(sizes)
-        INTEGER :: bubble, j, lsup, degen_label
-        LOGICAL, INTENT(OUT) :: output
-        INTEGER, INTENT(OUT) :: repeat_list(sizes)
-
-    END SUBROUTINE bubble_sort
-
-    SUBROUTINE compute_a_re ( iq, nrr_k, ndegen_k, irvec_r, dims)
-        !-----------------------------------------------------------------------
-        USE kinds,         ONLY : dp
-        USE pwcom,         ONLY : nbnd, nks, nkstot, isk, &
-            et, xk, ef,  nelec
-        USE cell_base,     ONLY : at, bg, omega, alat
-        USE start_k,       ONLY : nk1, nk2, nk3
-        USE ions_base,     ONLY : nat, amass, ityp, atm, ntyp => nsp, tau
-        USE phcom,         ONLY : nq1, nq2, nq3
-        USE modes,         ONLY : nmodes
-        USE epwcom,        ONLY : nbndsub, fsthick, epwread, longrange, &
-            epwwrite, ngaussw, degaussw, lpolar, lifc, lscreen, &
-            etf_mem, scr_typ, &
-            elecselfen, phonselfen, nest_fn, a2f, specfun_ph, &
-            vme, eig_read, ephwrite, nkf1, nkf2, nkf3, &
-            efermi_read, fermi_energy, specfun_el, band_plot, &
-            nqf1, nqf2, nqf3, mp_mesh_k, restart, prtgkk, &
-            plselfen, specfun_pl, wfcelec, num_cbands
-        USE noncollin_module, ONLY : noncolin
-        USE constants_epw, ONLY : ryd2ev, ryd2mev, one, two, czero, twopi, ci, zero
-        USE io_files,      ONLY : prefix, diropn
-        USE io_global,     ONLY : stdout, ionode, meta_ionode_id
-        USE elph2,         ONLY : cu, cuq, lwin, lwinq,&
-            chw, chw_ks, cvmew, cdmew, rdw, &
-            epmatwp, epmatq, etf, etf_k, etf_ks, xqf, xkf, &
-            wkf, dynq, nqtotf, nkqf, epf17, nkf, nqf, et_ks, &
-            ibndmin, ibndmax, lambda_all, dmec, dmef, vmef, &
-            sigmai_all, sigmai_mode, gamma_all, epsi, zstar, &
-            efnew, ifc, sigmar_all, zi_all, nkqtotf, eps_rpa, &
-            g2_4, wf, nbndskip
-#if defined(__NAG)
-        USE f90_unix_io,   ONLY : FLUSH
-#endif
-        USE mp,            ONLY : mp_barrier, mp_bcast, mp_sum
-        USE io_global,     ONLY : ionode_id
-        USE mp_global,     ONLY : inter_pool_comm, intra_pool_comm, root_pool, &
-            world_comm
-        USE mp_world,      ONLY : mpime
-        USE division,  ONLY : fkbounds
-        USE wan2bloch, ONLY : dynwan2bloch, dynifc2blochf, hamwan2bloch, ephwan2blochp
-        !
-        IMPLICIT NONE
-        !
-        !INTEGER :: nrws
-        !REAL(kind=DP) :: xxq(3), w2(3*nat), rws(0:3,200)
-        !COMPLEX(kind=DP) :: uf ( nmodes, nmodes)
-        !
-        INTEGER, INTENT (IN) :: iq, nrr_k
-        INTEGER, INTENT (IN) :: ndegen_k(:,:,:) ! ! Added for polaron calculations by Chao Lian.
-        INTEGER, INTENT (IN) :: dims
-        INTEGER :: nksqtotf, lower_bnd, upper_bnd
-        REAL(KIND=dp), INTENT (IN) :: irvec_r(3,nrr_k)
-        !
-        ! Local  variables
-        COMPLEX(KIND=dp) :: cfac(nrr_k), cfacq(nrr_k), cufkk ( nbndsub, nbndsub),cufkq ( nbndsub, nbndsub), &
-            epmatf( nbndsub, nbndsub, nmodes), bmatf( nbndsub, nbndsub)
-        LOGICAL :: already_skipped
-        !! Skipping band during the Wannierization
-        LOGICAL :: exst
-        !! If the file exist
-        LOGICAL :: opnd
-        !! Check whether the file is open.
-        !
-        CHARACTER (LEN=256) :: filint
-        !! Name of the file to write/read
-        CHARACTER (LEN=256) :: namef
-        !! Name of the file
-        CHARACTER (LEN=30)  :: myfmt
-        !! Variable used for formatting output
-        !
-        INTEGER :: ios
-        !! integer variable for I/O control
-        INTEGER :: ik
-        !! Counter on coarse k-point grid
-        INTEGER :: ikk
-        !! Counter on k-point when you have paired k and q
-        INTEGER :: ikq
-        !! Paired counter so that q is adjacent to its k
-        INTEGER :: ibnd
-        !! Counter on band
-        INTEGER :: jbnd
-        !! Counter on band
-        INTEGER :: imode
-        !! Counter on mode
-        INTEGER :: na
-        !! Counter on atom
-        INTEGER :: mu
-        !! counter on mode
-        INTEGER :: nu
-        !! counter on mode
-        INTEGER :: fermicount
-        !! Number of states at the Fermi level
-        INTEGER :: nrec
-        !! record index when reading file
-        INTEGER :: lrepmatw
-        !! record length while reading file
-        INTEGER :: i,j
-        !! Index when writing to file
-        INTEGER :: ikx
-        !! Counter on the coase k-grid
-        INTEGER :: ikfx
-        !! Counter on the fine k-grid.
-        INTEGER :: xkk1, xkq1
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk2, xkq2
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk3, xkq3
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: ir
-        !! Counter for WS loop
-        INTEGER :: nrws
-        !! Number of real-space Wigner-Seitz
-        INTEGER :: valuerss(2)
-        !! Return virtual and resisdent memory from system
-        INTEGER, PARAMETER :: nrwsx=200
-        !! Maximum number of real-space Wigner-Seitz
-        !
-        REAL(KIND=dp) :: rdotk_scal
-        !! Real (instead of array) for $r\cdot k$
-        REAL(KIND=dp) :: xxq(3)
-        !! Current q-point
-        REAL(KIND=dp) :: xxk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkq(3)
-        !! Current k+q point on the fine grid
-        REAL(KIND=dp) :: rws(0:3,nrwsx)
-        !! Real-space wigner-Seitz vectors
-        REAL(KIND=dp) :: atws(3,3)
-        !! Maximum vector: at*nq
-        REAL(KIND=dp), EXTERNAL :: efermig
-        !! External function to calculate the fermi energy
-        REAL(KIND=dp), EXTERNAL :: efermig_seq
-        !! Same but in sequential
-        REAL(KIND=dp), PARAMETER :: eps = 0.01/ryd2mev
-        !! Tolerence
-        !
-        COMPLEX(KIND=dp) :: tableqx (4*nk1+1,2*nkf1+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqy (4*nk2+1,2*nkf2+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqz (4*nk3+1,2*nkf3+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe  (:,:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe_mem  (:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons (written on disk)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfac1(:)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfacq1(:)
-        INTEGER :: ounit, nx,ny,nz, np, hh, sort_indice_cutoff, n1_dim, k
-        INTEGER :: ne, irr, ncb, ib, ii, jj, kk
-        INTEGER :: n1_dim_x,n1_dim_y,n1_dim_z,hhx,hhy,hhz,npx,npy,npz
-        INTEGER :: grid_nature_s(3), grid_infor(9)
-        COMPLEX(KIND=dp) :: z1, z2, au
-        COMPLEX(KIND=dp), ALLOCATABLE :: ac(:), carica(:,:,:), am(:,:)
-        REAL(KIND=dp) :: wannier_func, iso_gaussian, xcart(3), rcart(3), rcoor(3)
-        REAL(KIND=dp) :: e1(3), e2(3), e3(3), x0(3), m1, m2, m3, xkkf(3)
-        REAL(KIND=dp) :: deltax, deltay, deltaz, rdk
-        CHARACTER*12 :: aclist
-        CHARACTER(*), PARAMETER :: fileplace = "./grid/"
-        !
-    END SUBROUTINE compute_a_re
-
-    SUBROUTINE interpol_a_k ( iq, nrr_k, ndegen_k, irvec_r, dims)
-        !-----------------------------------------------------------------------
-        USE kinds,         ONLY : dp
-        USE pwcom,         ONLY : nbnd, nks, nkstot, isk, &
-            et, xk, ef,  nelec
-        USE cell_base,     ONLY : at, bg, omega, alat
-        USE start_k,       ONLY : nk1, nk2, nk3
-        USE ions_base,     ONLY : nat, amass, ityp, atm, ntyp => nsp, tau
-        USE phcom,         ONLY : nq1, nq2, nq3
-        USE modes,         ONLY : nmodes
-        USE epwcom,        ONLY : nbndsub, fsthick, epwread, longrange, &
-            epwwrite, ngaussw, degaussw, lpolar, lifc, lscreen, &
-            etf_mem, scr_typ, &
-            elecselfen, phonselfen, nest_fn, a2f, specfun_ph, &
-            vme, eig_read, ephwrite, nkf1, nkf2, nkf3, &
-            efermi_read, fermi_energy, specfun_el, band_plot, &
-            nqf1, nqf2, nqf3, mp_mesh_k, restart, prtgkk, &
-            plselfen, specfun_pl, wfcelec, num_cbands
-        USE noncollin_module, ONLY : noncolin
-        USE constants_epw, ONLY : ryd2ev, ryd2mev, one, two, czero, twopi, ci, zero
-        USE io_files,      ONLY : prefix, diropn
-        USE io_global,     ONLY : stdout, ionode, meta_ionode_id
-        USE elph2,         ONLY : cu, cuq, lwin, lwinq,&
-            chw, chw_ks, cvmew, cdmew, rdw, &
-            epmatwp, epmatq, etf, etf_k, etf_ks, xqf, xkf, &
-            wkf, dynq, nqtotf, nkqf, epf17, nkf, nqf, et_ks, &
-            ibndmin, ibndmax, lambda_all, dmec, dmef, vmef, &
-            sigmai_all, sigmai_mode, gamma_all, epsi, zstar, &
-            efnew, ifc, sigmar_all, zi_all, nkqtotf, eps_rpa, &
-            g2_4, wf, nbndskip
-#if defined(__NAG)
-        USE f90_unix_io,   ONLY : FLUSH
-#endif
-        USE mp,            ONLY : mp_barrier, mp_bcast, mp_sum
-        USE io_global,     ONLY : ionode_id
-        USE mp_global,     ONLY : inter_pool_comm, intra_pool_comm, root_pool, &
-            world_comm
-        USE mp_world,      ONLY : mpime
-        USE division,  ONLY : fkbounds
-        USE wan2bloch, ONLY : dynwan2bloch, dynifc2blochf, hamwan2bloch, ephwan2blochp
-        !
-        IMPLICIT NONE
-        !
-        !INTEGER :: nrws
-        !REAL(kind=DP) :: xxq(3), w2(3*nat), rws(0:3,200)
-        !COMPLEX(kind=DP) :: uf ( nmodes, nmodes)
-        !
-        INTEGER, INTENT (IN) :: iq !,!, ndegen_q(20*nq1*nq2*nq3)
-        INTEGER, INTENT (IN) :: dims
-        INTEGER, INTENT (IN) :: nrr_k, ndegen_k(:,:,:)! ! Added for polaron calculations by Chao Lian.
-        INTEGER :: nksqtotf, lower_bnd, upper_bnd
-        REAL(KIND=dp), INTENT (IN) :: irvec_r(3,nrr_k)
-        !
-        ! Local  variables
-        COMPLEX(KIND=dp) :: cfac(nrr_k), cfacq(nrr_k), cufkk ( nbndsub, nbndsub),cufkq( nbndsub, nbndsub), &
-            epmatf( nbndsub, nbndsub, nmodes), bmatf( nbndsub,nbndsub)
-        LOGICAL :: already_skipped
-        !! Skipping band during the Wannierization
-        LOGICAL :: exst
-        !! If the file exist
-        LOGICAL :: opnd
-        !! Check whether the file is open.
-        !
-        CHARACTER (LEN=256) :: filint
-        !! Name of the file to write/read
-        CHARACTER (LEN=256) :: namef
-        !! Name of the file
-        CHARACTER (LEN=30)  :: myfmt
-        !! Variable used for formatting output
-        !
-        INTEGER :: ios
-        !! integer variable for I/O control
-        INTEGER :: ik
-        !! Counter on coarse k-point grid
-        INTEGER :: ikk
-        !! Counter on k-point when you have paired k and q
-        INTEGER :: ikq
-        !! Paired counter so that q is adjacent to its k
-        INTEGER :: ibnd
-        !! Counter on band
-        INTEGER :: jbnd
-        !! Counter on band
-        INTEGER :: imode
-        !! Counter on mode
-        INTEGER :: na
-        !! Counter on atom
-        INTEGER :: mu
-        !! counter on mode
-        INTEGER :: nu
-        !! counter on mode
-        INTEGER :: fermicount
-        !! Number of states at the Fermi level
-        INTEGER :: nrec
-        !! record index when reading file
-        INTEGER :: lrepmatw
-        !! record length while reading file
-        INTEGER :: i,j
-        !! Index when writing to file
-        INTEGER :: ikx
-        !! Counter on the coase k-grid
-        INTEGER :: ikfx
-        !! Counter on the fine k-grid.
-        INTEGER :: xkk1, xkq1
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk2, xkq2
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk3, xkq3
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: ir
-        !! Counter for WS loop
-        INTEGER :: nrws
-        !! Number of real-space Wigner-Seitz
-        INTEGER :: valuerss(2)
-        !! Return virtual and resisdent memory from system
-        INTEGER, PARAMETER :: nrwsx=200
-        !! Maximum number of real-space Wigner-Seitz
-        !
-        REAL(KIND=dp) :: rdotk_scal
-        !! Real (instead of array) for $r\cdot k$
-        REAL(KIND=dp) :: xxq(3)
-        !! Current q-point
-        REAL(KIND=dp) :: xxk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkq(3)
-        !! Current k+q point on the fine grid
-        REAL(KIND=dp) :: rws(0:3,nrwsx)
-        !! Real-space wigner-Seitz vectors
-        REAL(KIND=dp) :: atws(3,3)
-        !! Maximum vector: at*nq
-        REAL(KIND=dp), EXTERNAL :: efermig
-        !! External function to calculate the fermi energy
-        REAL(KIND=dp), EXTERNAL :: efermig_seq
-        !! Same but in sequential
-        REAL(KIND=dp), PARAMETER :: eps = 0.01/ryd2mev
-        !! Tolerence
-        !
-        COMPLEX(KIND=dp) :: tableqx (4*nk1+1,2*nkf1+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqy (4*nk2+1,2*nkf2+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqz (4*nk3+1,2*nkf3+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe  (:,:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe_mem  (:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons (written on disk)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfac1(:)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfacq1(:)
-        INTEGER :: ounit, nx,ny,nz, hh, sort_indice_cutoff, n1_dim, k
-        INTEGER :: ne, irr, ncb, ib, ii, jj, kk, nkf_global
-        INTEGER :: n1_dim_x,n1_dim_y,n1_dim_z,hhx,hhy,hhz,npx,npy,npz
-        INTEGER :: grid_nature_s(3), grid_infor(9)
-        COMPLEX(KIND=dp) :: z1, z2, au
-        COMPLEX(KIND=dp), ALLOCATABLE :: ac(:,:), carica(:,:,:), am(:), ac_full(:,:)
-        REAL(KIND=dp) :: wannier_func, iso_gaussian, xcart(3), rcart(3), rcoor(3)
-        REAL(KIND=dp) :: e1(3), e2(3), e3(3), x0(3), m1, m2, m3, xkkf(3)
-        REAL(KIND=dp) :: deltax, deltay, deltaz, rdk
-        CHARACTER*12 :: aclist
-        CHARACTER(*), PARAMETER :: fileplace = "./grid/"
-        !
-    END SUBROUTINE interpol_a_k
-
-    SUBROUTINE interpol_bq ( iq, nrr_k, nrr_q, nrr_g, irvec_q, irvec_g, ndegen_k, ndegen_q, ndegen_g, &
-            w2, uf, epmatwef, irvec_r, dims, dims2)
-        !-----------------------------------------------------------------------
-        USE kinds,         ONLY : dp
-        USE pwcom,         ONLY : nbnd, nks, nkstot, isk, &
-            et, xk, ef,  nelec
-        USE cell_base,     ONLY : at, bg, omega, alat
-        USE start_k,       ONLY : nk1, nk2, nk3
-        USE ions_base,     ONLY : nat, amass, ityp, atm, ntyp => nsp, tau
-        USE phcom,         ONLY : nq1, nq2, nq3
-        USE modes,         ONLY : nmodes
-        USE epwcom,        ONLY : nbndsub, fsthick, epwread, longrange, &
-            epwwrite, ngaussw, degaussw, lpolar, lifc, lscreen, &
-            etf_mem, scr_typ, &
-            elecselfen, phonselfen, nest_fn, a2f, specfun_ph, &
-            vme, eig_read, ephwrite, nkf1, nkf2, nkf3, &
-            efermi_read, fermi_energy, specfun_el, band_plot, &
-            nqf1, nqf2, nqf3, mp_mesh_k, restart, prtgkk, &
-            plselfen, specfun_pl, wfcelec, num_cbands
-        USE noncollin_module, ONLY : noncolin
-        USE constants_epw, ONLY : ryd2ev, ryd2mev, one, two, czero, twopi, ci, zero
-        USE io_files,      ONLY : prefix, diropn
-        USE io_global,     ONLY : stdout, ionode, meta_ionode_id
-        USE elph2,         ONLY : cu, cuq, lwin, lwinq,&
-            chw, chw_ks, cvmew, cdmew, rdw, &
-            epmatwp, epmatq, etf, etf_k, etf_ks, xqf, xkf, &
-            wkf, dynq, nqtotf, nkqf, epf17, nkf, nqf, et_ks, &
-            ibndmin, ibndmax, lambda_all, dmec, dmef, vmef, &
-            sigmai_all, sigmai_mode, gamma_all, epsi, zstar, &
-            efnew, ifc, sigmar_all, zi_all, nkqtotf, eps_rpa, &
-            g2_4, wf, nbndskip
-#if defined(__NAG)
-        USE f90_unix_io,   ONLY : FLUSH
-#endif
-        USE mp,            ONLY : mp_barrier, mp_bcast, mp_sum
-        USE io_global,     ONLY : ionode_id
-        USE mp_global,     ONLY : inter_pool_comm, intra_pool_comm, root_pool, &
-            world_comm
-        USE mp_world,      ONLY : mpime
-        USE division,  ONLY : fkbounds
-        USE wan2bloch, ONLY : dynwan2bloch, dynifc2blochf, hamwan2bloch, ephwan2blochp
-        !
-        IMPLICIT NONE
-        !
-        INTEGER, INTENT (IN) :: iq, nrr_k, nrr_q, nrr_g
-        INTEGER, INTENT (IN) :: irvec_q(:,:), irvec_g(:,:)
-        INTEGER, INTENT (IN) :: ndegen_k(:,:,:), ndegen_q(:,:,:), ndegen_g(:,:,:)
-        REAL(KIND=dp), INTENT (INOUT) :: w2(3*nat)
-        COMPLEX(KIND=dp), INTENT (INOUT) :: uf ( nmodes, nmodes), epmatwef( nbndsub, nbndsub, nrr_k, nmodes)
-        REAL(KIND=dp), INTENT (IN) :: irvec_r(3,nrr_k)
-        !
-        ! Local  variables
-        !! FIXME: dims should be nbnd_sub and intent(in)
-        INTEGER, INTENT (IN) :: dims
-        !! Dims is either nbndsub if use_ws or 1 if not
-        INTEGER, INTENT (IN) :: dims2
-        !! Dims is either nat if use_ws or 1 if not
-
-        INTEGER :: nksqtotf, lower_bnd, upper_bnd
-        INTEGER :: n1_dim_x,n1_dim_y,n1_dim_z,hhx,hhy,hhz,npx,npy,npz
-        INTEGER :: grid_nature_s(3), grid_infor(9)
-
-        COMPLEX(KIND=dp) :: cfac(nrr_k), cfacq(nrr_k), cufkk ( nbndsub,nbndsub),cufkq( nbndsub, nbndsub), &
-            epmatf( nbndsub, nbndsub, nmodes), bmatf( nbndsub,nbndsub)
-        CHARACTER*12 :: aclist
-        CHARACTER(*), PARAMETER :: fileplace = "./grid/"
-        !
-        ! Local  variables
-        LOGICAL :: already_skipped
-        !! Skipping band during the Wannierization
-        LOGICAL :: exst
-        !! If the file exist
-        LOGICAL :: opnd
-        !! Check whether the file is open.
-        !
-        CHARACTER (LEN=256) :: filint
-        !! Name of the file to write/read
-        CHARACTER (LEN=256) :: namef
-        !! Name of the file
-        CHARACTER (LEN=30)  :: myfmt
-        !! Variable used for formatting output
-        !
-        INTEGER :: ios
-        !! integer variable for I/O control
-        INTEGER :: ik
-        !! Counter on coarse k-point grid
-        INTEGER :: ikk
-        !! Counter on k-point when you have paired k and q
-        INTEGER :: ikq
-        !! Paired counter so that q is adjacent to its k
-        INTEGER :: ibnd
-        !! Counter on band
-        INTEGER :: jbnd
-        !! Counter on band
-        INTEGER :: imode
-        !! Counter on mode
-        INTEGER :: na
-        !! Counter on atom
-        INTEGER :: mu
-        !! counter on mode
-        INTEGER :: nu
-        !! counter on mode
-        INTEGER :: fermicount
-        !! Number of states at the Fermi level
-        INTEGER :: nrec
-        !! record index when reading file
-        INTEGER :: lrepmatw
-        !! record length while reading file
-        INTEGER :: i,j
-        !! Index when writing to file
-        INTEGER :: ikx
-        !! Counter on the coase k-grid
-        INTEGER :: ikfx
-        !! Counter on the fine k-grid.
-        INTEGER :: xkk1, xkq1
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk2, xkq2
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: xkk3, xkq3
-        !! Integer of xkk when multiplied by nkf/nk
-        INTEGER :: ir
-        !! Counter for WS loop
-        INTEGER :: nrws
-        !! Number of real-space Wigner-Seitz
-        INTEGER :: valuerss(2)
-        !! Return virtual and resisdent memory from system
-        INTEGER, PARAMETER :: nrwsx=200
-        !! Maximum number of real-space Wigner-Seitz
-        !
-        REAL(KIND=dp) :: rdotk_scal
-        !! Real (instead of array) for $r\cdot k$
-        REAL(KIND=dp) :: xxq(3)
-        !! Current q-point
-        REAL(KIND=dp) :: xxk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkk(3)
-        !! Current k-point on the fine grid
-        REAL(KIND=dp) :: xkq(3)
-        !! Current k+q point on the fine grid
-        REAL(KIND=dp) :: rws(0:3,nrwsx)
-        !! Real-space wigner-Seitz vectors
-        REAL(KIND=dp) :: atws(3,3)
-        !! Maximum vector: at*nq
-        REAL(KIND=dp), EXTERNAL :: efermig
-        !! External function to calculate the fermi energy
-        REAL(KIND=dp), EXTERNAL :: efermig_seq
-        !! Same but in sequential
-        REAL(KIND=dp), PARAMETER :: eps = 0.01/ryd2mev
-        !! Tolerence
-        !
-        COMPLEX(KIND=dp) :: tableqx (4*nk1+1,2*nkf1+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqy (4*nk2+1,2*nkf2+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp) :: tableqz (4*nk3+1,2*nkf3+1)
-        !! Look-up table for the exponential (speed optimization) in the case of
-        !! homogeneous grids.
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe  (:,:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons
-        COMPLEX(KIND=dp), ALLOCATABLE :: epmatwe_mem  (:,:,:,:)
-        !! e-p matrix  in wannier basis - electrons (written on disk)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfac1(:)
-        !COMPLEX(kind=DP), ALLOCATABLE :: cfacq1(:)
-        INTEGER :: ounit, nx,ny,nz,  hh, sort_indice_cutoff, n1_dim, k
-        INTEGER :: ne, irr, ncb, ib, ii, jj, kk
-        COMPLEX(KIND=dp) :: z1, z2, au
-        COMPLEX(KIND=dp), ALLOCATABLE :: ac(:,:), carica(:,:,:), am(:), ac_full(:,:), &
-            ac_read(:), bq(:)
-        REAL(KIND=dp) :: wannier_func, iso_gaussian, xcart(3), rcart(3), rcoor(3)
-        REAL(KIND=dp) :: e1(3), e2(3), e3(3), x0(3), m1, m2, m3, xkkf(3)
-        REAL(KIND=dp) :: deltax, deltay, deltaz, rdk
-        !
-    END SUBROUTINE interpol_bq
-
-    SUBROUTINE get_cfac(xk, nrr_k, ndegen_k, irvec_r, dims, cfac)
-        USE epwcom, ONLY : use_ws
-        USE constants_epw, ONLY : twopi, ci, czero
-        USE kinds,         ONLY : dp, i4b
-
-        IMPLICIT NONE
-
-
-        INTEGER, INTENT(IN):: nrr_k, dims
-        INTEGER, INTENT(IN):: ndegen_k(nrr_k, dims, dims)
-        REAL(KIND=dp), INTENT (IN) :: xk(3), irvec_r(3, nrr_k)
-        COMPLEX(KIND=dp), INTENT(OUT) :: cfac(nrr_k, dims, dims)
-        ! Local Variables
-        REAL(KIND=dp) :: rdotk(nrr_k)
-        INTEGER:: ikk, ikq, iw, iw2, ir
-
-        cfac = czero
-        rdotk = czero
-
-        CALL dgemv('t', 3, nrr_k, twopi, irvec_r, 3, xk, 1, 0.0_dp, rdotk, 1 )
-        !
-        IF (use_ws) THEN
-            DO iw=1, dims
-                DO iw2=1, dims
-                    DO ir = 1, nrr_k
-                        IF (ndegen_k(ir,iw2,iw) > 0) THEN
-                            cfac(ir,iw2,iw)  = EXP( ci*rdotk(ir) ) / ndegen_k(ir,iw2,iw)
-                        ENDIF
-                    ENDDO
-                ENDDO
-            ENDDO
-        ELSE
-            cfac(:,1,1)   = EXP( ci*rdotk(:) ) / ndegen_k(:,1,1)
-        ENDIF
-    END SUBROUTINE
-SUBROUTINE ksstate_extract ( )
-    !-----------------------------------------------------------------------
-    USE kinds,         ONLY : dp
-    USE mp_global,     ONLY : my_pool_id, nproc_pool,    &
-        intra_pool_comm, &
-        inter_pool_comm, inter_image_comm, world_comm
-    USE wavefunctions,  ONLY: evc
-    USE io_global,     ONLY : stdout, ionode, meta_ionode_id
-    USE pwcom,         ONLY : nks, nkstot
-    USE elph2,         ONLY : ngk_all, igk_k_all, xqf, nqf, xkf, nkf, nkqtotf
-    USE cell_base,  ONLY : at, alat, celldm
-    USE ions_base,  ONLY : nat, ityp, atm, ntyp => nsp, tau
-    USE gvect,         ONLY : g, ngm
-    USE constants, ONLY :  pi
-    USE constants_epw, ONLY : twopi
-    USE mp,            ONLY : mp_barrier, mp_bcast, mp_sum
-    USE mp_bands,  ONLY : intra_bgrp_comm
-    USE wvfct,     ONLY : npwx
-    USE division,  ONLY : fkbounds
-    USE io_epw,           ONLY : readwfc
-    !
-    INTEGER :: ipooltmp, ik, iq, npw, lower_bnd, upper_bnd, ig, igp, igpp, nksqtotf
-    INTEGER :: ounit, nx,ny,nz, np, hh, sort_indice_cutoff, n1_dim, i, j, k
-    REAL(KIND=dp) :: cg2, temp1(3), temp2(3)
-    REAL(KIND=dp) :: e1(3), e2(3), e3(3), x0(3), m1, m2, m3
-    REAL(KIND=dp) :: deltax, deltay, deltaz
-    COMPLEX(KIND=dp), ALLOCATABLE :: carica (:,:,:)
-    INTEGER, ALLOCATABLE :: igk(:)
-    COMPLEX(KIND=dp), ALLOCATABLE :: eigx (:), eigy (:), eigz (:)
-    COMPLEX(KIND=dp), ALLOCATABLE :: ac(:), acp(:)
-
-END SUBROUTINE ksstate_extract
-
-!----------------------------------------------------------------------------
-END MODULE
-
-MODULE polaron_old
-    PUBLIC  :: wfc_elec_old
-CONTAINS
-    !-----------------------------------------------------------------------
-    SUBROUTINE wfc_elec_old ( nrr_k, nrr_q, nrr_g, irvec_q, irvec_g, ndegen_k, ndegen_q, ndegen_g, &
-            w2, uf, epmatwef, irvec_r, dims, dims2 )
-        !-----------------------------------------------------------------------
-        !
-        !  Compute the polaron envelop function and formation energy (Pekar's continuum model,
-        !  generalized Frohlich vertex )
-        !  DS,CV
-        !
-        !  Use effective mass, static dielectric permittivity and Born effective
-        !  charge as input variables
-        !
-        !  Use matrix elements, electronic eigenvalues and phonon frequencies
-        !  from ep-wannier interpolation
-        !
-        !  This subroutine computes the contribution from phonon iq to all k-points
-        !  The outer loop in ephwann_shuffle.f90 will loop over all iq points
-        !  The contribution from each iq is summed at the end of this subroutine for iq=nqtotf
-        !  to recover the per-ik electron wavefunction
-        !
-        !  Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-        !-----------------------------------------------------------------------
-        USE kinds,         ONLY : dp
-        use test_tools, only : para_write
-        USE io_global,     ONLY : stdout,ionode_id, meta_ionode_id
-        USE modes,         ONLY : nmodes
-        USE epwcom,        ONLY : nbndsub, shortrange, restart_polaron,&
-            fsthick, ngaussw, degaussw,spherical_cutoff,&
-            eps_acustic, efermi_read, fermi_energy, lscreen, &
-            model_vertex, nkf1, nkf2, nkf3, conv_thr_polaron, &
-            r01, r02, r03, num_cbands, start_mode, cb_shift, &
-            polaron_dos, polaron_type, &
-            electron_dos, phonon_dos, diag_mode, restart_polaron_mode
-        USE pwcom,         ONLY : ef !,nelec, isk
-        USE elph2,         ONLY : etf, ibndmin, ibndmax, nkqf, xqf, &
-            nkf, nqf,epf17, wkf, nqtotf, wf, wqf, xkf, nkqtotf, &
-            efnew, eps_rpa, g2_all, &
-            ac, hkk, ec, ekf, gq, n1_dim,hh,np, &
-            g2_4
-        USE constants_epw, ONLY : ryd2mev, one, ryd2ev, two, zero, pi, ci, twopi,eps6,&
-            czero, cone
-        USE mp,            ONLY : mp_barrier, mp_sum,mp_bcast
-        USE mp_global,     ONLY : inter_pool_comm
-        USE mp_world,      ONLY : mpime, world_comm
-        USE ions_base,     ONLY : nat, tau
-        USE start_k,       ONLY : nk1, nk2, nk3
-        USE cell_base,     ONLY : at, bg, alat, omega
-        USE parallel_include
-        USE division,  ONLY : fkbounds
-        USE ephblochkq, ONLY: gkq, phonon_eigvector
-        USE poolgathering, ONLY : poolgather2
-        !
-        IMPLICIT NONE
-        !
-        INTEGER, INTENT (IN) :: nrr_k, nrr_q, nrr_g ! Added for polaron calculations by Chao Lian.
-        REAL(KIND=dp), INTENT (INOUT) :: w2(3*nat)
-        INTEGER, INTENT (IN) :: irvec_q(:,:), irvec_g(:,:)
-        REAL(KIND=dp), ALLOCATABLE :: irvec_r(:,:)
-        INTEGER, INTENT (IN) :: ndegen_k(:,:,:), ndegen_q(:,:,:), ndegen_g(:,:,:)
-        COMPLEX(KIND=dp), INTENT (INOUT) :: uf ( nmodes, nmodes), epmatwef( nbndsub, nbndsub, nrr_k, nmodes)
-        INTEGER, INTENT (IN) :: dims
-        !! Dims is either nbndsub if use_ws or 1 if not
-        INTEGER, INTENT (IN) :: dims2
-        !! Dims is either nat if use_ws or 1 if not
-        !
-        INTEGER :: ik, ikk, ikq, ibnd, jbnd, imode, nrec, iq, fermicount, ir
-        COMPLEX(KIND=dp) :: cfac, weight, zdotu
-        REAL(KIND=dp) :: g2, ekk, ekq, wq, ef0, wgkq, inv_eptemp0, w0g1, w0g2, &
-            g2_tmp, inv_wq, inv_degaussw
-        REAL(KIND=dp), EXTERNAL :: wgauss, w0gauss
-        REAL(KIND=dp), PARAMETER :: eps2 = 2.d0/ryd2mev
-        !
-        ! variables for collecting data from all pools in parallel case
-        !
-        INTEGER :: nksqtotf, lower_bnd, upper_bnd
-        REAL(KIND=dp), ALLOCATABLE :: xkf_all(:,:), etf_all(:,:)
-        !
-        ! variables defined by DS
-        INTEGER :: delta_function,counter,i,j,k,ii,n3,n4,sort_indice_even ! define the size of supercell
-        INTEGER :: ncb, ikbnd, kbnd, delta, ikkk, sort_indice, band_pos
-        INTEGER :: grid_infor(9)
-        REAL(KIND=dp) :: diff_k(3) ,r0(3),a,c, omega_c, m_2, diff_kk(3),var, ecb0, tauu0(3)
-        REAL(KIND=dp), ALLOCATABLE :: k_grid(:,:)  ! k grids used for matrix
-        COMPLEX(KIND=dp), ALLOCATABLE :: z1(:), z2(:)
-        COMPLEX(KIND=dp), ALLOCATABLE :: mt(:,:), vt(:), tm(:),outt(:),lac(:,:)
-
-        !
-        INTEGER :: sort_indice_cutoff, m1,m2,m3, na, ipol, grid_nature, igamma
-        INTEGER :: lg(nqf)
-        INTEGER, ALLOCATABLE :: dk_list(:), dkk_list(:), mygrow_list(:)
-        INTEGER :: n1_dim_x,n1_dim_y,n1_dim_z,hhx,hhy,hhz,npx,npy,npz
-        INTEGER :: grid_nature_s(3)
-        !
-        COMPLEX(KIND=dp) :: phase_f,zz, dtauu(3), apa
-        COMPLEX(KIND=dp) :: tv(3),m(3,3), m_copy(3,3)!, g_frohlich0, g_frohlich, g_frohlich2 ! test variables, checking
-        COMPLEX(KIND=dp) :: am(2,2), eig(2), am_copy(2), gv(nmodes,nqf)
-        !
-        INTEGER :: ierr !
-        INTEGER :: n, nb  ! problem size and block size
-        INTEGER :: myarows, myacols, mygrows, mygcols ! size of local subset of global matrix
-        INTEGER :: myxrows, myxcols   ! size of local subset of global vector
-        INTEGER :: myi, myj, rows, loc_i, loc_j, max_row, max_col
-        INTEGER, ALLOCATABLE :: call_list(:), request_list(:,:)
-        COMPLEX(KIND=dp), DIMENSION(:,:), ALLOCATABLE :: mya,myx,myy, mymsg
-        COMPLEX(KIND=dp), DIMENSION(:,:,:), ALLOCATABLE :: myg
-
-
-        INTEGER :: tar_p, ib, ibp, i_n, i_m, i_ki, i_kj
-        INTEGER, EXTERNAL :: numroc   ! blacs routine
-        INTEGER :: me, procs, icontxt, prow, pcol, myrow, mycol  ! blacs data
-        INTEGER :: p_col_i, p_row_i, pme, blacs_pnum, icaller, tar_prow, tar_pcol
-        INTEGER :: p_row_i2, icaller2, tar_prow2, tar_pcol2, loc_i2, loc_j2
-        INTEGER :: info    ! scalapack return value
-        INTEGER, DIMENSION(9)   :: ides_a, ides_x, ides_y, ides_g ! matrix descriptors
-        INTEGER, DIMENSION(2) :: dimsl
-        CHARACTER*12 :: aclist, folder
-        CHARACTER(*), PARAMETER :: fileplace = "./grid/"
-        INTEGER :: ntot, mk
-        REAL(KIND=dp) :: ds, he, sigma, dos
-        REAL(KIND=dp) :: rp(3), qrp, qcart(3), e_formation2, e_formation1, wlo
-        !!!
-#if defined (__MPI)
-        COMPLEX(KIND=dp), ALLOCATABLE :: array(:,:,:), VAL(:,:,:), nval(:,:,:)
-        INTEGER (KIND = mpi_address_kind) :: SIZE,lowerbound, sizeofreal, disp_aint,&
-            disp_int
-        INTEGER :: win, tar
-#endif
-    END SUBROUTINE wfc_elec_old
-END MODULE
diff --git a/EPW/src/ephwann_shuffle.f90 b/EPW/src/ephwann_shuffle.f90
index 122394007..74c5309a1 100644
--- a/EPW/src/ephwann_shuffle.f90
+++ b/EPW/src/ephwann_shuffle.f90
@@ -118,18 +118,14 @@
   USE parallel_include, ONLY : MPI_MODE_RDONLY, MPI_INFO_NULL, MPI_OFFSET_KIND, &
                                MPI_OFFSET
 #endif
-  ! ---------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  USE epwcom,           ONLY : wfcelec, start_band, polaron_wf, restart_polaron,   &
-                               polaron_interpol, polaron_bq, polaron_dos, nPlrn,   &
-                               wfcelec_old
-  USE elph2,            ONLY : g2_4
-  USE ephblochkq,       ONLY : interpol_bq, interpol_a_k, compute_a_re
-  USE polaron,          ONLY : wfc_elec, epfall, ufall, Hamil, eigVec,             &
-                               interp_plrn_wf, interp_plrn_bq, plot_plrn_wf
-  USE polaron_old,      ONLY : wfc_elec_old
-  ! --------------------------------------------------------------------------------
+  !!!!!
+  ! Added for polaron calculations.
+  USE epwcom,         ONLY : plrn, time_rev_U_plrn, g_start_band_plrn, g_end_band_plrn
+  USE epwcom,         ONLY : scell_mat_plrn
+  USE polaron,        ONLY : plrn_flow_select, plrn_prepare, plrn_save_g_to_file
+  USE polaron,        ONLY : is_mirror_q, is_mirror_k
+  USE polaron,        ONLY : kpg_map, ikq_all
+  !!!!!
   !
   IMPLICIT NONE
   !
@@ -338,6 +334,9 @@
   COMPLEX(KIND = DP), ALLOCATABLE :: eimpmatf(:, :)
   !! carrier-ionized impurity matrix in smooth Bloch basis
   !!!!!
+  LOGICAL :: mirror_k, mirror_q, mirror_kpq, ik_global
+  REAL(KIND = DP) :: xxq_r(3)
+  !!!!!
   !
   CALL start_clock('ephwann')
   !
@@ -975,8 +974,8 @@
     !
     totq = 0
     !
-    IF (wfcelec) THEN
-      !
+    IF (plrn .OR. scell_mat_plrn) THEN
+      ! For polaron calculations, all the q points have to be included
       totq = nqf
       ALLOCATE(selecq(nqf), STAT = ierr)
       IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating selecq', 1)
@@ -984,7 +983,7 @@
         selecq(iq) = iq
       ENDDO
       !
-    ELSE ! wfcelec
+    ELSE
       ! Check if the file has been pre-computed
       IF (mpime == ionode_id) THEN
         INQUIRE(FILE = 'selecq.fmt', EXIST = exst)
@@ -1012,7 +1011,7 @@
       WRITE(stdout, '(5x,a,i8,a)')'We only need to compute ', totq, ' q-points'
       WRITE(stdout, '(5x,a)')' '
       !
-    ENDIF ! wfcelec
+    END IF ! plrn
     !
     ! -----------------------------------------------------------------------
     ! Possible restart during step 1)
@@ -1108,38 +1107,8 @@
       IF (ierr /= 0) CALL errore('ephwann_shiffle', 'Error allocating epsilon2_abs_lorenz_all', 1)
     ENDIF ! indabs
     !
-    ! --------------------------------------------------------------------------------------
-    ! Polaron shell implementation for future use
-    IF (wfcelec) then
-      IF (polaron_interpol) THEN
-        ALLOCATE(eigVec(nktotf * nbndfst, nplrn), STAT = ierr)
-        IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating eigVec', 1)
-        eigVec = czero
-        CALL interp_plrn_wf(nrr_k, ndegen_k, irvec_r, dims)
-        iq_restart = totq ! Skip the calculation of e-ph element, save the time.
-        DEALLOCATE(eigVec)
-      ELSEIF(polaron_bq) THEN
-        CALL interp_plrn_bq(nrr_q, ndegen_q, irvec_q)
-        iq_restart = totq ! Skip the calculation of e-ph element, save the time.
-      ELSEIF(polaron_wf) THEN
-        CALL plot_plrn_wf()
-        iq_restart = totq
-      ELSE
-        ALLOCATE(eigVec(nktotf * nbndfst, nplrn), STAT = ierr)
-        IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating eigVec', 1)
-        eigVec = czero
-        ALLOCATE(epfall(nbndfst, nbndfst, nmodes, nkf, nqtotf), STAT = ierr)
-        IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating epfall', 1)
-        epfall = czero
-        ALLOCATE(ufall(nmodes, nmodes, nqtotf), STAT = ierr)
-        IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating ufall', 1)
-        ufall = czero
-        ALLOCATE(Hamil(nkf * nbndfst, nktotf * nbndfst), STAT = ierr)
-        IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating Hamil', 1)
-        Hamil = czero
-      ENDIF
-    ENDIF
-    ! -------------------------------------------------------------------------------------
+    ! Polaron calculations: iq_restart may be set to totq in restart/interpolation mode.
+    IF (plrn) call plrn_prepare(totq, iq_restart)
     !
     ! Restart in SERTA case or self-energy (electron or plasmon) case
     IF (restart) THEN
@@ -1301,6 +1270,23 @@
       ELSE
         xxq = xqf(:, iq)
       ENDIF
+      !!!!!!!
+      ! Added by Chao Lian for enforcing the time-rev symmetry of e_q
+      ! for polaron calculations, xxq_r is coordinates of the mirror point of xxq
+      ! for other calculations, xxq_r is xxq.
+      IF (plrn) THEN
+         IF (is_mirror_q (iq)) THEN
+            xxq_r = xqf(:, kpg_map(iq))
+            mirror_q = .true.
+         ELSE
+            xxq_r = xxq
+            mirror_q = .false.
+         END IF
+      ELSE
+         xxq_r = xxq
+         mirror_q = .false.
+      end if
+      !!!!!!!
       ! Temporarily commented by H. Lee
 !      CALL find_gmin(xxq)
       !
@@ -1309,9 +1295,16 @@
       ! ------------------------------------------------------
       !
       IF (.NOT. lifc) THEN
-        CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, uf, w2)
+        !!!!!
+        ! CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, uf, w2)
+        CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq_r, uf, w2, mirror_q)
+        !!!!!
       ELSE
-        CALL dynifc2blochf(nmodes, rws, nrws, xxq, uf, w2)
+        !!!!!
+        !TODO: apply degeneracy lift in dynifc2blochf
+        ! CALL dynifc2blochf(nmodes, rws, nrws, xxq, uf, w2)
+        CALL dynifc2blochf(nmodes, rws, nrws, xxq_r, uf, w2, mirror_q)
+        !!!!!
       ENDIF
       !
       ! ...then take into account the mass factors and square-root the frequencies...
@@ -1367,6 +1360,23 @@
         !
         xkk = xkf(:, ikk)
         xkq2 = xkk + xxq
+	      !
+        IF (plrn .and. time_rev_U_plrn) THEN
+            mirror_k = is_mirror_k(ik)
+        ELSE
+            mirror_k = .false.
+        END IF
+        ! note that ikq_all return the global index of k+q
+        ! Since ikq2 is global index, we need is_mirror_q instead of is_mirror_k
+        ! is_mirror_q uses global k/q index.
+        ! this is different from is_mirror_k which needs local index k
+        IF (plrn .and. time_rev_U_plrn) THEN
+            ! kpg_map return global index, thus xkf_all is needed
+            mirror_kpq = is_mirror_q(ikq_all(ik, iq))
+        ELSE
+            mirror_kpq = .false.
+        END IF
+        !!!!!
         !
         CALL DGEMV('t', 3, nrr_k, twopi, irvec_r, 3, xkk, 1, 0.0_DP, rdotk, 1)
         CALL DGEMV('t', 3, nrr_k, twopi, irvec_r, 3, xkq2, 1, 0.0_DP, rdotk2, 1)
@@ -1393,12 +1403,22 @@
         !
         ! Kohn-Sham first, then get the rotation matricies for following interp.
         IF (eig_read) THEN
-          CALL hamwan2bloch(nbndsub, nrr_k, cufkk, etf_ks(:, ikk), chw_ks, cfac, dims)
-          CALL hamwan2bloch(nbndsub, nrr_k, cufkq, etf_ks(:, ikq), chw_ks, cfacq, dims)
+          !!!!!
+          !CALL hamwan2bloch(nbndsub, nrr_k, cufkk, etf_ks(:, ikk), chw_ks, cfac, dims)
+          !CALL hamwan2bloch(nbndsub, nrr_k, cufkq, etf_ks(:, ikq), chw_ks, cfacq, dims)
+          CALL hamwan2bloch(nbndsub, nrr_k, cufkk, etf_ks(:, ikk), chw_ks, cfac, dims, mirror_k)
+          CALL hamwan2bloch(nbndsub, nrr_k, cufkq, etf_ks(:, ikq), chw_ks, cfacq, dims, mirror_kpq)
+          !!!!!
         ENDIF
         !
-        CALL hamwan2bloch(nbndsub, nrr_k, cufkk, etf(:, ikk), chw, cfac, dims)
-        CALL hamwan2bloch(nbndsub, nrr_k, cufkq, etf(:, ikq), chw, cfacq, dims)
+        !!!!!
+        !
+        !CALL hamwan2bloch(nbndsub, nrr_k, cufkk, etf(:, ikk), chw, cfac, dims)
+        !CALL hamwan2bloch(nbndsub, nrr_k, cufkq, etf(:, ikq), chw, cfacq, dims)
+        CALL hamwan2bloch(nbndsub, nrr_k, cufkk, etf(:, ikk), chw, cfac, dims, mirror_k)
+        CALL hamwan2bloch(nbndsub, nrr_k, cufkq, etf(:, ikq), chw, cfacq, dims, mirror_kpq)
+        !!!!!
+        ! Save U^{\dagger}_{mn}(k) matrix to file
         !
         ! Apply a possible scissor shift
         etf(icbm:nbndsub, ikk) = etf(icbm:nbndsub, ikk) + scissor
@@ -1437,7 +1457,7 @@
           ! within a Fermi shell of size fsthick
           !
           IF (((MINVAL(ABS(etf(:, ikk) - ef)) < fsthick) .AND. &
-              (MINVAL(ABS(etf(:, ikq) - ef)) < fsthick)) .OR. wfcelec) THEN
+              (MINVAL(ABS(etf(:, ikq) - ef)) < fsthick))) THEN
             !
             ! Compute velocities
             !
@@ -1581,16 +1601,10 @@
        WRITE(stdout, '(7x, a, f12.6, a)' ) 'Adaptative smearing el-impurity = Min: ', DSQRT(2.d0) * MINVAL(valmin) * ryd2mev,' meV'
        WRITE(stdout, '(7x, a, f12.6, a)' ) '                                  Max: ', DSQRT(2.d0) * MAXVAL(valmax) * ryd2mev,' meV'
       ENDIF
-      !
-      ! --------------------------------------------------------------------------------------------------
-      ! Added by Chao Lian for polaron calculations
-      ! Shell implementation for future use.
-      IF (wfcelec .AND. (.NOT. polaron_bq) .AND. (.NOT. polaron_interpol)) THEN
-        ufall(1:nmodes, 1:nmodes, iq) = uf(1:nmodes, 1:nmodes)
-        epfall(1:nbndfst, 1:nbndfst, 1:nmodes, 1:nkf, iq) = epf17(1:nbndfst, 1:nbndfst, 1:nmodes, 1:nkf)
-      ENDIF
-      ! --------------------------------------------------------------------------------------------------
-      !
+      !!!!!
+      ! Added by Chao Lian to save the el-ph matrix element to files
+      IF (plrn) CALL plrn_save_g_to_file(iq, epf17, wf)
+      !!!!!
       IF (prtgkk    ) CALL print_gkk(iq)
       IF (phonselfen) CALL selfen_phon_q(iqq, iq, totq)
       IF (elecselfen) CALL selfen_elec_q(iqq, iq, totq, first_cycle)
@@ -1745,64 +1759,10 @@
       ENDIF ! scatread
     ENDDO  ! end loop over q points
     !
-    ! --------------------------------------------------------------------------------
-    ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-    ! Shell implementation for future use.
-    IF (wfcelec .AND. (.NOT. polaron_bq) .AND. (.NOT. polaron_interpol)) THEN
-      IF (wfcelec_old) then
-        ALLOCATE(g2_4(ibndmax - ibndmin + 1, ibndmax - ibndmin + 1, nmodes, nkqtotf / 2), STAT = ierr)
-        IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error allocating g2_4', 1)
-        g2_4(:, :, :, :) = czero
-        CALL wfc_elec_old(nrr_k, nrr_q, nrr_g, irvec_q, irvec_g, &
-           ndegen_k, ndegen_q, ndegen_g, w2, uf, epmatwef, irvec_r, &
-           dims, dims2)
-      ELSE
-        CALL wfc_elec(nrr_k, ndegen_k, irvec_r, dims)
-      ENDIF
-      IF (polaron_wf) THEN       ! calculating A(Re) from Ac.txt
-        CALL compute_a_re (iq, nrr_k, ndegen_k, irvec_r, dims)
-        RETURN
-      ENDIF
-      IF (polaron_interpol) THEN   ! interpolate Ak from ar.txt ( A(Re))
-        CALL interpol_a_k(iq, nrr_k, ndegen_k, irvec_r, dims)
-        return
-      ENDIF
-      DO iq = iq_restart, nqf
-        xxq = xqf(:, iq)
-        IF (.NOT. lifc) THEN
-          CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, uf, w2)
-        ELSE
-          CALL dynifc2blochf(nmodes, rws, nrws, xxq, uf, w2)
-        ENDIF
-        !
-        DO nu = 1, nmodes
-          !
-          ! wf are the interpolated eigenfrequencies (omega on fine grid)
-          IF (w2(nu) > 0.d0) THEN
-            wf(nu, iq) =  SQRT(ABS(w2(nu)))
-          ELSE
-            wf(nu, iq) = -SQRT(ABS(w2(nu)))
-          ENDIF
-        ENDDO
-      ENDDO
-      !
-      IF (polaron_bq) THEN   ! interpolate bq from both A(Re) and Ac(k)
-        DO iq = 1, nqf
-          CALL interpol_bq(iq, nrr_k, nrr_q, nrr_g, irvec_q, irvec_g, ndegen_k, ndegen_q, ndegen_g, &
-             w2, uf, epmatwef, irvec_r, dims, dims2)
-        ENDDO
-        RETURN
-      ENDIF
-      DEALLOCATE(g2_4, STAT = ierr)
-      IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error deallocating g2_4', 1)
-      DEALLOCATE(epfall, STAT = ierr)
-      IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error deallocating epfall', 1)
-      DEALLOCATE(Hamil, STAT = ierr)
-      IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error deallocating Hamil', 1)
-      DEALLOCATE(eigVec, STAT = ierr)
-      IF (ierr /= 0) CALL errore('ephwann_shuffle', 'Error deallocating eigVec', 1)
-    ENDIF
-    ! End Polaron Code
+!!!!!
+    ! Added for polaron calculations. Originally by Danny Sio, rewritten by Chao Lian.
+    IF (plrn) CALL plrn_flow_select(nrr_k, ndegen_k, irvec_r, nrr_q, ndegen_q, irvec_q, rws, nrws, dims)
+!!!!!
     ! --------------------------------------------------------------------------------
     !
     ! Check Memory usage
diff --git a/EPW/src/epw_init.f90 b/EPW/src/epw_init.f90
index 499575557..79d55e6f5 100644
--- a/EPW/src/epw_init.f90
+++ b/EPW/src/epw_init.f90
@@ -57,7 +57,6 @@
   ! --------------------------------------------------------------------------------
   ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
   ! Shell implementation for future use.
-  USE epwcom,           ONLY : polaron_wf
   USE grid,             ONLY : loadqmesh_serial, loadkmesh_para
   ! --------------------------------------------------------------------------------
   !
@@ -251,16 +250,6 @@
     CALL dvanqq2()
   ENDIF
   !
-  ! ------------------------------------------------------------------------------- 
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use. 
-  ! IF (polaron_wf) then
-  !   CALL loadqmesh_serial
-  !    CALL loadkmesh_para
-  !    CALL KSstate_extract()
-  !   STOP
-  ! ENDIF
-  ! -------------------------------------------------------------------------------
   !
   CALL stop_clock('epw_init')
   !
diff --git a/EPW/src/epw_readin.f90 b/EPW/src/epw_readin.f90
index 836af21c8..eefc2eacd 100644
--- a/EPW/src/epw_readin.f90
+++ b/EPW/src/epw_readin.f90
@@ -93,20 +93,23 @@
   USE paw_variables, ONLY : okpaw
   USE io_epw,        ONLY : param_get_range_vector
   USE open_close_input_file, ONLY : open_input_file, close_input_file
-  !
-  ! ---------------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  USE epwcom,        ONLY : wfcelec, restart_polaron, spherical_cutoff,                &
-                            model_vertex, conv_thr_polaron, n_dop,                     &
-                            polaron_wf, r01, r02, r03, num_cbands, start_band,         &
-                            start_mode, cb_shift, polaron_interpol, polaron_bq,        &
-                            polaron_dos, electron_dos, phonon_dos, diag_mode,          &
-                            restart_polaron_mode, polaron_type, nPlrn, wfcelec_old,    &
-                            sigma_plrn, ethr_Plrn, full_diagon_plrn, mixing_Plrn,      &
-                            init_plrn_wf, niterPlrn, nDOS_plrn, emax_plrn, emin_plrn,  &
-                            sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn
-!!!!!
+  ! Added for polaron calculations by Chao Lian
+  USE epwcom,        ONLY : plrn, restart_plrn, conv_thr_plrn, end_band_plrn,         &
+                            cal_psir_plrn, start_band_plrn,  type_plrn, nstate_plrn,  &
+                            interp_Ank_plrn, interp_Bqu_plrn,                         &
+                            init_sigma_plrn, init_k0_plrn,                            &
+                            full_diagon_plrn, mixing_Plrn, init_plrn, niter_plrn,     &
+                            nDOS_plrn, edos_max_plrn, edos_min_plrn, edos_sigma_plrn, &
+                            pdos_sigma_plrn, pdos_max_plrn, pdos_min_plrn,            &
+                            seed_plrn, ethrdg_plrn, time_rev_A_plrn, nhblock_plrn,    &
+                            beta_plrn, Mmn_plrn, recal_Mmn_plrn, r0_plrn, debug_plrn, &
+                            time_rev_U_plrn,  g_start_band_plrn, g_end_band_plrn,     &
+                            g_start_energy_plrn, g_end_energy_plrn, lrot,             &
+                            model_vertex_plrn, model_enband_plrn, model_phfreq_plrn,  &
+                            kappa_plrn, omega_LO_plrn, m_eff_plrn, step_wf_grid_plrn, &
+                            g_power_order_plrn, g_tol_plrn, io_lvl_plrn,              &
+                            scell_mat_plrn, scell_mat, init_ntau_plrn,                &
+                            adapt_ethrdg_plrn, init_ethrdg_plrn, nethrdg_plrn
   !-------------------------------------------------------------------------------------
   ! SH: Added for tc linearized equation, sparce sampling, and full-bandwidth calculations
   USE epwcom,        ONLY : gridsamp, griddens, tc_linear, tc_linear_solver, fbw,      &
@@ -201,16 +204,21 @@
        !!!!!
   !---------------------------------------------------------------------------------
   ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-       wfcelec, restart_polaron, spherical_cutoff, model_vertex, start_mode,   &
-       conv_thr_polaron, polaron_wf, r01, r02, r03, num_cbands, start_band,    &
-       cb_shift, polaron_interpol, polaron_bq, polaron_dos, electron_dos ,     &
-       phonon_dos, diag_mode, restart_polaron_mode, polaron_type,              &
-       niterPlrn, wfcelec_old, sigma_plrn, ethr_Plrn, full_diagon_plrn,        &
-       mixing_Plrn, init_plrn_wf, nPlrn, nDOS_plrn, emax_plrn, emin_plrn,      &
-       !!!!!
-       ! sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn
-       sigma_edos_plrn, sigma_pdos_plrn, pmax_plrn, pmin_plrn,                 &
+       plrn, restart_plrn, conv_thr_plrn, end_band_plrn, lrot,                 &
+       cal_psir_plrn, start_band_plrn,  type_plrn, nstate_plrn,                &
+       interp_Ank_plrn, interp_Bqu_plrn, init_sigma_plrn, init_k0_plrn,        &
+       full_diagon_plrn, mixing_Plrn, init_plrn, niter_plrn,                   &
+       nDOS_plrn, edos_max_plrn, edos_min_plrn, edos_sigma_plrn,               &
+       pdos_sigma_plrn, pdos_max_plrn, pdos_min_plrn,                          &
+       seed_plrn, ethrdg_plrn, time_rev_A_plrn, nhblock_plrn,                  &
+       beta_plrn, Mmn_plrn, recal_Mmn_plrn, r0_plrn, debug_plrn,               &
+       time_rev_U_plrn,  g_start_band_plrn, g_end_band_plrn,                   &
+       g_start_energy_plrn, g_end_energy_plrn,                                 &
+       model_vertex_plrn, model_enband_plrn, model_phfreq_plrn,                &
+       kappa_plrn, omega_LO_plrn, m_eff_plrn, step_wf_grid_plrn,               &
+       g_power_order_plrn, g_tol_plrn, io_lvl_plrn,                            &
+       scell_mat_plrn, scell_mat, init_ntau_plrn,                              &
+       adapt_ethrdg_plrn, init_ethrdg_plrn, nethrdg_plrn,                      &
   !---------------------------------------------------------------------------------
   ! SH: Added for tc linearized equation, sparce sampling, and full-bandwidth runs
        tc_linear, tc_linear_solver, gridsamp, griddens, fbw, dos_del, muchem
@@ -629,6 +637,7 @@
   meff         = 1.d0
   epsiheg      = 1.d0
   lphase       = .FALSE.
+  lrot         = .FALSE.
   omegamin     = 0.d0  ! eV
   omegamax     = 10.d0 ! eV
   omegastep    = 1.d0  ! eV
@@ -663,47 +672,63 @@
   ii_eps0 = 0.0d0
   !!!!!
   !
-  ! --------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  nPlrn = 1
-  niterPlrn = 50
-  n_dop = 0.d0
-  smear_rpa = 1.d0
-  wfcelec = .false.
-  wfcelec_old = .false.
-  restart_polaron = .false.
-  spherical_cutoff = 100.d0
-  model_vertex = .false.
-  conv_thr_polaron = 1E-5
-  polaron_wf       = .false.
-  polaron_interpol = .false.
-  polaron_bq       = .false.
-  polaron_dos      = .false.
-  r01        = 0.d0
-  r02        = 0.d0
-  r03        = 0.d0
-  num_cbands = 1 !2
-  start_band = 4 !11
-  start_mode = 1 !1
-  cb_shift   = 0 !0
-  electron_dos = .false.
-  phonon_dos = .false.
+  ! Added for polaron calculations by Chao Lian
+  nstate_plrn = 1
+  niter_plrn = 50
+  plrn = .false.
+  restart_plrn = .false.
+  model_vertex_plrn = .false.
+  model_enband_plrn = .false.
+  model_phfreq_plrn = .false.
+  kappa_plrn = 0.0
+  omega_LO_plrn = 0.0
+  m_eff_plrn = 0.0
+  conv_thr_plrn = 1E-5
+  g_power_order_plrn = 1
+  step_wf_grid_plrn = 1
+  cal_psir_plrn       = .false.
+  interp_Ank_plrn     = .false.
+  interp_Bqu_plrn     = .false.
+
+  start_band_plrn = 0
+  end_band_plrn = 0
+  g_start_band_plrn = 0
+  g_end_band_plrn = 0
+  g_start_energy_plrn = -10.0
+  g_end_energy_plrn = 10.0
+
   full_diagon_plrn = .false.
   mixing_Plrn = 1.0
-  diag_mode = 1
-  init_plrn_wf = 2
+  init_plrn = 1
+  Mmn_plrn = .false.
+  recal_Mmn_plrn = .false.
+  debug_plrn = .false.
+  r0_plrn = zero
   nDOS_plrn = 1000
-  emin_plrn = zero
-  pmin_plrn = zero
-  emax_plrn = 1.d0
-  pmax_plrn = 1d-2
-  sigma_edos_plrn = 0.1d0
-  sigma_pdos_plrn = 1d-3
-  restart_polaron_mode = 1
-  polaron_type = -1
-  sigma_plrn  = 4.6
-  ethr_Plrn = 1E-3
+  edos_min_plrn = zero ! eV
+  pdos_min_plrn = zero ! meV
+  edos_max_plrn = zero ! eV
+  pdos_max_plrn = zero ! meV
+  edos_sigma_plrn = 0.01d0 ! eV
+  pdos_sigma_plrn = 0.1 ! meV
+  type_plrn = -1
+  init_sigma_plrn  = 4.6
+  init_k0_plrn = (/1000.d0, 1000.d0, 1000.d0/)
+  ethrdg_plrn = 1E-6
+  time_rev_A_plrn = .false.
+  time_rev_U_plrn = .false.
+  nhblock_plrn = 1
+  beta_plrn = 0.0
+  g_tol_plrn = -0.01
+  io_lvl_plrn = 0 
+  scell_mat_plrn = .false.
+  scell_mat(1, 1:3) = (/1, 0, 0/)
+  scell_mat(2, 1:3) = (/0, 1, 0/)
+  scell_mat(3, 1:3) = (/0, 0, 1/)
+  init_ntau_plrn = 1
+  adapt_ethrdg_plrn = .false.
+  init_ethrdg_plrn = 1.d-2
+  nethrdg_plrn = 11
   ! ---------------------------------------------------------------------------------
   !
   ! Reading the namelist inputepw and check
@@ -1180,26 +1205,13 @@
   CALL mp_bcast(nk2, meta_ionode_id, world_comm)
   CALL mp_bcast(nk3, meta_ionode_id, world_comm)
   !
-  ! ---------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  CALL mp_bcast(nPlrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(niterPlrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(spherical_cutoff, meta_ionode_id, world_comm)
-  CALL mp_bcast(conv_thr_polaron, meta_ionode_id, world_comm)
-  CALL mp_bcast(restart_polaron, meta_ionode_id, world_comm)
-  CALL mp_bcast(polaron_type, meta_ionode_id, world_comm)
-  CALL mp_bcast(sigma_plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(full_diagon_plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(mixing_Plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(ethr_Plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(init_plrn_wf, meta_ionode_id, world_comm)
-  CALL mp_bcast(sigma_edos_plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(sigma_pdos_plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(pmax_plrn, meta_ionode_id, world_comm)
-  CALL mp_bcast(pmin_plrn, meta_ionode_id, world_comm)
-  ! ---------------------------------------------------------------------------------
-  !
+  CALL mp_bcast(g_tol_plrn, meta_ionode_id, world_comm)
+  CALL mp_bcast(io_lvl_plrn, meta_ionode_id, world_comm)
+  CALL mp_bcast(init_ntau_plrn, meta_ionode_id, world_comm)
+  CALL mp_bcast(init_k0_plrn, meta_ionode_id, world_comm)
+  CALL mp_bcast(adapt_ethrdg_plrn, meta_ionode_id, world_comm)
+  CALL mp_bcast(init_ethrdg_plrn, meta_ionode_id, world_comm)
+  CALL mp_bcast(nethrdg_plrn, meta_ionode_id, world_comm)
   amass = AMU_RY * amass
   !
   !-----------------------------------------------------------------------
diff --git a/EPW/src/epw_setup.f90 b/EPW/src/epw_setup.f90
index 813745a48..c3417ba6d 100644
--- a/EPW/src/epw_setup.f90
+++ b/EPW/src/epw_setup.f90
@@ -43,11 +43,6 @@
   USE fft_base,      ONLY : dfftp
   USE gvecs,         ONLY : doublegrid
   USE noncollin_module, ONLY : noncolin, domag, m_loc, angle1, angle2, ux, nspin_mag
-  ! ---------------------------------------------------------------------------------
-  ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
-  ! Shell implementation for future use.
-  USE epwcom,        ONLY: polaron_wf
-  ! ---------------------------------------------------------------------------------
   !
   IMPLICIT NONE
   !
@@ -68,20 +63,6 @@
   !
   CALL start_clock('epw_setup')
   !
-  IF (.NOT. polaron_wf)  THEN
-    DO jk = 1, nkstot
-      xx_c = xk_cryst(1, jk) * nkc1
-      yy_c = xk_cryst(2, jk) * nkc2
-      zz_c = xk_cryst(3, jk) * nkc3
-      !
-      ! check that the k-mesh was defined in the positive region of 1st BZ
-      !
-      IF (xx_c < -eps5 .OR. yy_c < -eps5 .OR. zz_c < -eps5) &
-        CALL errore('epw_setup', 'coarse k-mesh needs to be strictly positive in 1st BZ', 1)
-      !
-    ENDDO
-  ENDIF ! not polaron_wf
-  !
   ! 1) Computes the total local potential (external+scf) on the smooth grid
   !
   CALL set_vrs(vrs, vltot, v%of_r, kedtau, v%kin_r, dfftp%nnr, nspin, doublegrid)
diff --git a/EPW/src/epwcom.f90 b/EPW/src/epwcom.f90
index 01627f95b..2e23f3f48 100644
--- a/EPW/src/epwcom.f90
+++ b/EPW/src/epwcom.f90
@@ -118,6 +118,8 @@
   !! if .TRUE. print the |g| vertex in [meV].
   LOGICAL :: lphase
   !! if .TRUE. fix the gauge when diagonalizing the interpolated dynamical matrix and electronic Hamiltonian.
+  LOGICAL :: lrot
+  !! if .TRUE. fix the gauge when diagonalizing the interpolated dynamical matrix and electronic Hamiltonian.
   LOGICAL :: lindabs
   !! if .TRUE. perform phonon-assisted absorption calculations
   LOGICAL :: use_ws
@@ -422,68 +424,105 @@
   ! ----------------------------------------------------------------------------------
   ! Added for polaron calculations. Originally by Danny Sio, modified by Chao Lian.
   ! Shell implementation for future use.
-  INTEGER :: num_cbands
-  !! number of conduction bands accounted in the Hilbert space of polaron Hamilontian
-  INTEGER :: start_band
-  !! start band index in matrix element
-  INTEGER :: nPlrn
-  !! Number of polaron bands
-  INTEGER :: nDOS_plrn
+  INTEGER :: start_band_plrn, end_band_plrn
+  !! Start and end band index in matrix element
+  INTEGER :: nstate_plrn
+  !! Number of polaron states calculated
+  INTEGER :: ndos_plrn
   !! Number of grid in polaron DOS calculation
-  INTEGER :: start_mode
-  !! start mode index
-  INTEGER :: cb_shift
-  !! CB shifted in polaron calculation
-  INTEGER :: diag_mode
-  !! diagonalization mode for polaron solver
-  INTEGER :: restart_polaron_mode
-  !! polaron restart mode
-  INTEGER :: polaron_type
+  INTEGER :: type_plrn
   !! polaron type (electron/hole)
-  INTEGER :: init_plrn_wf
+  INTEGER :: init_plrn
   !! initial polaron wavefuntion with 1:Gaussian package and 2:Random number
-  INTEGER :: niterPlrn
+  INTEGER :: niter_plrn
   !! Maximum number of polaron SCF loops.
-  REAL(KIND = DP) :: spherical_cutoff
-  !!  spherical_cutoff for fast convergence in polaron calculation
-  REAL(KIND = DP) :: conv_thr_polaron
+  INTEGER :: seed_plrn
+  !! The seed number to generate the random initial polaron wavefunction
+  INTEGER :: nhblock_plrn
+  !! The seed number to generate the random initial polaron wavefunction
+  INTEGER :: g_start_band_plrn, g_end_band_plrn
+  !! Start and end band in saving g matrix
+  INTEGER :: step_wf_grid_plrn
+  !! number of grid to skip in output of real space wavefunction
+  INTEGER :: io_lvl_plrn
+  !! number of grid to skip in output of real space wavefunction
+  INTEGER :: scell_mat(3,3)
+  !! Supercell transformation matrix for polaron
+  INTEGER :: init_ntau_plrn
+  !! Number of displacements to be read from file to calculate polaron energy landscape  
+  INTEGER :: nethrdg_plrn
+  !! Number of steps in the diagonalization if adaptive_ethrdg_plrn=.true.
+  REAL(KIND = DP) :: conv_thr_plrn
   !!  convergent threshold for polaron calculation
-  REAL(KIND = DP) :: r01, r02, r03
-  !!  x,y,z Carsteian coordinate of polaron centre
-  REAL(KIND = DP) :: emin_plrn, emax_plrn, sigma_edos_plrn
+  REAL(KIND = DP) :: edos_min_plrn, edos_max_plrn, edos_sigma_plrn
   !! Electron Energy range in polaron DOS calculation
-  REAL(KIND = DP) :: pmin_plrn, pmax_plrn, sigma_pdos_plrn
+  REAL(KIND = DP) :: pdos_min_plrn, pdos_max_plrn, pdos_sigma_plrn
   !! Phonon Energy range in polaron DOS calculation
-  REAL(KIND = DP) :: n_dop
-  !! extra added charge per cell (as tot_charge, with opposite sign)
-  REAL(KIND = DP) :: sigma_plrn
-  !! decay radius of polaron wavefunction in initialization
-  REAL(KIND = DP) :: ethr_Plrn
-  !! decay radius of polaron wavefunction in initialization
-  REAL(KIND = DP) :: mixing_Plrn
+  REAL(KIND = DP) :: init_sigma_plrn
+  !! decay radius of polaron wavefunction in Gaussian initialization
+  REAL(KIND = DP) :: init_k0_plrn(3)
+  !! center k of Gaussian initialization
+  REAL(KIND = DP) :: mixing_plrn
+  !! mixing weight in plrn iteration
+  REAL(KIND = DP) :: ethrdg_plrn
+  !! threshold in diagonalization of the polaron Hamiltonian
+  REAL(KIND = DP) :: beta_plrn(3)
   !! Mixing weight in Self-consistency
-  LOGICAL :: wfcelec
+  REAL(KIND = DP) :: r0_plrn(3)
+  !! Mixing weight in Self-consistency
+  REAL(KIND = DP) :: g_start_energy_plrn
+  !! Mixing weight in Self-consistency
+  REAL(KIND = DP) :: g_end_energy_plrn
+  !! Mixing weight in Self-consistency
+  REAL(KIND = DP) :: kappa_plrn
+  !! Mixing weight in Self-consistency
+  REAL(KIND = DP) :: g_power_order_plrn
+  !! kappa
+  REAL(KIND = DP) :: omega_LO_plrn
+  !! Higher optical phonon frequency
+  REAL(KIND = DP) :: m_eff_plrn
+  !! effective mass in LP model
+  REAL(KIND = DP) :: g_tol_plrn
+  !!
+  REAL(KIND = DP) :: as(3,3)
+  !! Supercell lattice vectors transformed by Smat, as=S*at
+  REAL(KIND = DP) :: bs(3,3)
+  !! Supercell reciprocal lattice vectors transformed by Sbar=transpose(inverse(Smat), bs=Sbar*bg  
+  REAL(KIND = DP) :: init_ethrdg_plrn
+  !! Initial coarse threshold to be used if adaptive_ethrdg_plrn=.true. 
+  !!
+  LOGICAL :: plrn
   !! if .true. calculates perturbated part of the wavefunction
-  LOGICAL :: restart_polaron
+  LOGICAL :: model_vertex_plrn
+  !! if .true. calculates vertex model
+  LOGICAL :: model_enband_plrn
+  !! if .true. calculates vertex model
+  LOGICAL :: model_phfreq_plrn
+  !! if .true. calculates vertex model
+  LOGICAL :: restart_plrn
   !! if .true. Using Ack from written outputs
-  LOGICAL :: model_vertex
-  !! if .true. Using model vertex and effective mass for polaron calculation
-  LOGICAL :: wfcelec_old
-  !! if .true. Using old algorithm by DS
   LOGICAL :: full_diagon_plrn
-   !! if .true. diagonalizing the polaron Hamiltonian with direct diagonalization
-  LOGICAL :: polaron_wf
+  !! if .true. diagonalizing the polaron Hamiltonian with direct diagonalization
+  LOGICAL :: cal_psir_plrn
   !! if .true. Generating a 3D-plot for polaron wavefunction
-  LOGICAL :: polaron_interpol
+  LOGICAL :: interp_Ank_plrn
   !! if .true. interpolating polaron A(k) from A(Re)
-  LOGICAL :: polaron_bq
+  LOGICAL :: interp_Bqu_plrn
   !! if .true. interpolating polaron phonon component bq from A(Re) and An(k)
-  LOGICAL :: polaron_dos
-  !! if .true. calculating polaron dos
-  LOGICAL :: electron_dos
-  !! if .true. calculating contributed electron dos
-  LOGICAL :: phonon_dos
-  !! if .true. calculating excited phonon dos
+  LOGICAL :: debug_plrn
+  !! if .true. interpolating polaron phonon component bq from A(Re) and An(k)
+  LOGICAL :: time_rev_A_plrn
+  !! if .true. time_rev_A_plrn
+  LOGICAL :: time_rev_U_plrn
+  !! if .true. time_rev_A_plrn
+  LOGICAL :: Mmn_plrn
+  !! if .true. time_rev_A_plrn
+  LOGICAL :: recal_Mmn_plrn
+  !! if .true. time_rev_A_plrn
+  LOGICAL :: scell_mat_plrn
+  !! if .true. activate supercell transformation for polaron
+  LOGICAL :: adapt_ethrdg_plrn
+  !! if .true. activate adaptive threshold in polaron davidson diagonalization
   ! ----------------------------------------------------------------------------------
   !
   !-----------------------------------------------------------------------
diff --git a/EPW/src/grid.f90 b/EPW/src/grid.f90
index f71426b24..e87f61297 100644
--- a/EPW/src/grid.f90
+++ b/EPW/src/grid.f90
@@ -29,14 +29,15 @@
     USE mp_world,  ONLY : mpime
     USE kinds,     ONLY : DP
     USE epwcom,    ONLY : filkf, nkf1, nkf2, nkf3, iterative_bte, &
-                          rand_k, rand_nk, mp_mesh_k, system_2d, eig_read, vme
+                          rand_k, rand_nk, mp_mesh_k, system_2d, eig_read, vme, &
+                          scell_mat_plrn, scell_mat, as, bs
     USE elph2,     ONLY : nkqtotf, nkqf, xkf, wkf, nkf, xkfd, deltaq, &
                           xkf_irr, wkf_irr, bztoibz, s_bztoibz
     USE cell_base, ONLY : at, bg
     USE symm_base, ONLY : s, t_rev, time_reversal, nsym
-    USE io_var,    ONLY : iunkf
-    USE low_lvl,   ONLY : init_random_seed
-    USE constants_epw, ONLY : eps4
+    USE io_var,    ONLY : iunkf, iunRpscell, iunkgridscell
+    USE low_lvl,   ONLY : init_random_seed, matinv3
+    USE constants_epw, ONLY : eps4, eps8
     USE noncollin_module, ONLY : noncolin
 # if defined(__MPI)
     USE parallel_include, ONLY : MPI_INTEGER2
@@ -68,10 +69,32 @@
     !! rest from the division of nr of q-points over pools
     INTEGER :: ierr
     !! Error status
+    INTEGER :: iRp1, iRp2, iRp3, Rpmax, nRp 
+    !! Number of unit cells within supercell
+    INTEGER :: Rp_crys_p(3)
+    !! Unit cell vectors in primitive crystal coordinates
+    INTEGER, ALLOCATABLE :: Rp(:, :)
+    !! List of unit cell vectors within supercell in primitive crystal coords
+    INTEGER :: iGs1, iGs2, iGs3, Gsmax, nGs
+    !! Number of supercell G-vectors within primitive reciprocal unit cell
+    INTEGER :: Gs_crys_s(3)
+    !! Supercell G-vectors in supercell reciprocal coordinates
+    REAL(KIND = DP) :: ap(3, 3), bp(3, 3)
+    !! Auxiliary definitions of real and reciprocal primitive cell vector matrix
     REAL(KIND = DP), ALLOCATABLE :: xkf_(:, :)
     !! coordinates k-points
     REAL(KIND = DP), ALLOCATABLE :: wkf_(:)
     !! weights k-points
+    REAL(KIND = DP) :: scell_mat_b(3, 3)
+    !! Reciprocal lattice transformation matrix
+    REAL(KIND = DP) :: p2s(3, 3), bs2p(3, 3)
+    !! Transformation matrix from primitive to supercell crystal coordinates
+    REAL(KIND = DP) :: Rp_crys_s(3)
+    !! Unit cell vectors in supercell crystal coordinates
+    REAL(KIND = DP) :: Gs_crys_p(3)
+    !! Supercell G-vectors in primitive crystal coordinates
+    REAL(KIND = DP), ALLOCATABLE :: Gs(:, :)
+    !! Supercell G-vectors within primitive reciprocal unit cell
     !
     IF (mpime == ionode_id) THEN
       IF (filkf /= '') THEN ! load from file
@@ -114,6 +137,144 @@
           CALL cryst_to_cart(nkqtotf, xkf_, at, -1)
         ENDIF
         !
+      !JLB
+      ELSEIF (scell_mat_plrn) THEN
+        !
+        WRITE(stdout, '(a)') ' '
+        WRITE(stdout, '(a)') '     Supercell transformation activated (k), as=S*at'
+        WRITE(stdout, '(a,3i4)') '     S(1, 1:3): ', scell_mat(1, 1:3)
+        WRITE(stdout, '(a,3i4)') '     S(2, 1:3): ', scell_mat(2, 1:3)
+        WRITE(stdout, '(a,3i4)') '     S(3, 1:3): ', scell_mat(3, 1:3)
+        !
+        ap = TRANSPOSE(at)
+        as = MATMUL(scell_mat,ap)
+        !
+        WRITE(stdout, '(a)') '     Transformed lattice vectors (alat units):'
+        WRITE(stdout, '(a,3f12.6)') '     as(1, 1:3): ', as(1, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     as(2, 1:3): ', as(2, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     as(3, 1:3): ', as(3, 1:3)
+        !
+        scell_mat_b = matinv3(REAL(scell_mat, DP))
+        scell_mat_b = TRANSPOSE(scell_mat_b)
+        !
+        WRITE(stdout, '(a)') '     Reciprocal lattice transformation matrix, Sbar = (S^{-1})^{t}:'
+        WRITE(stdout, '(a,3f12.6)') '     Sbar(1, 1:3): ', scell_mat_b(1, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     Sbar(2, 1:3): ', scell_mat_b(2, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     Sbar(3, 1:3): ', scell_mat_b(3, 1:3)
+        !
+        bp = TRANSPOSE(bg)
+        bs = MATMUL(scell_mat_b, bp)
+        !
+        WRITE(stdout, '(a)') '     Transformed reciprocal lattice vectors (2pi/alat units):'
+        WRITE(stdout, '(a,3f12.6)') '     bs(1, 1:3): ', bs(1, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     bs(2, 1:3): ', bs(2, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     bs(3, 1:3): ', bs(3, 1:3)
+        WRITE(stdout, '(a)') '  '
+        !
+        ! Define transformation matrix from primitive crystal coordinates
+        ! to supercell crystal coordinates Rp_crys_s = ((a_s)^{T})^{-1} (a_p)^{T} Rp_crys_p
+        p2s = matinv3(TRANSPOSE(as))
+        p2s = MATMUL(p2s,TRANSPOSE(ap))
+        !
+        ! Find how many unit cells are contained within the supercell
+        Rpmax = 5*MAXVAL(scell_mat) ! This should be large enough to find all
+        ALLOCATE(Rp(3, Rpmax**3), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadkmesh_para', 'Error allocating Rp', 1)
+        Rp  = 0
+        nRp = 0
+        DO iRp1 = -Rpmax, Rpmax
+          DO iRp2 = -Rpmax, Rpmax
+            DO iRp3 = -Rpmax, Rpmax
+              Rp_crys_p = (/iRp1, iRp2, iRp3/)
+              Rp_crys_s = MATMUL(p2s, Rp_crys_p)
+              ! Unit cell within supercell if Rp\in(0,1) in supercell crystal coordinates
+              IF (ALL(Rp_crys_s > -eps8) .AND. ALL(Rp_crys_s < 1.d0-eps8)) THEN
+                nRp = nRp + 1
+                Rp(1:3, nRp) = Rp_crys_p
+              END IF
+            END DO
+          END DO
+        END DO
+        WRITE(stdout, '(a, 3i6)') '     Number of unit cells within supercell:', nRp
+        !
+        ! Write Rp-s in supercell to file
+        IF (mpime == ionode_id) THEN
+          OPEN(UNIT = iunRpscell, FILE = 'Rp.scell.plrn', ACTION = 'write')
+          WRITE(iunRpscell, *) nRp 
+          DO iRp1 = 1, nRp
+            WRITE(iunRpscell, *) Rp(1:3, iRp1)
+          END DO
+          CLOSE(iunRpscell)
+        ENDIF
+        !
+        IF (ALLOCATED(Rp)) DEALLOCATE(Rp)
+        !
+        ! Define transformation matrix from reciprocal supercell crystal coordinates
+        ! to reciprocal primitive crystal coordinates
+        ! Gs_crys_p = ((bp)^{T})^{-1} (bs)^{T} Gs_crys_s
+        bs2p = matinv3(TRANSPOSE(bp))
+        bs2p = MATMUL(bs2p, TRANSPOSE(bs))
+        !
+        ! Find how many k-points lie within primitive reciprocal cell
+        Gsmax = Rpmax ! This should be large enough to find all
+        ALLOCATE(Gs(3, Gsmax**3), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadqmesh_serial', 'Error allocating Gs', 1)
+        Gs = 0.d0
+        nGs = 0
+        DO iGs1 = -Gsmax, Gsmax
+          DO iGs2 = -Gsmax, Gsmax
+            DO iGs3 = -Gsmax, Gsmax
+              Gs_crys_s = (/iGs1, iGs2, iGs3/)
+              Gs_crys_p = MATMUL(bs2p, Gs_crys_s)
+              ! Gs within primitive reciprocal unit cell if Gs\in(0,1) in crys_p coords.
+              IF (ALL(Gs_crys_p > -eps8) .AND. ALL(Gs_crys_p < 1.d0-eps8)) THEN
+                nGs = nGs + 1
+                Gs(1:3, nGs) = Gs_crys_p
+              END IF
+            END DO  
+          END DO
+        END DO
+        WRITE(stdout, '(a, 3i6)') '     Number of k-points needed:', nGs
+        !DO iGs1 = 1, nGs
+        !  WRITE(stdout, '(3f12.6)') Gs(1:3, iGs1)
+        !END DO
+        !
+        ! Write Gs-s within unit cell BZ (k-grid) to file
+        IF (mpime == ionode_id) THEN
+          OPEN(UNIT = iunkgridscell, FILE = 'kgrid.scell.plrn', ACTION = 'write')
+          WRITE(iunkgridscell, *) nGs
+          DO iGs1 = 1, nGs
+            WRITE(iunkgridscell, '(3f12.6)') Gs(1:3, iGs1)
+          END DO
+          CLOSE(iunkgridscell)
+        ENDIF
+        !
+        ! Save list of needed k-points
+        nkqtotf = nGs
+        ALLOCATE(xkf_(3, 2 * nkqtotf), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadkmesh_para', 'Error allocating xkf_', 1)
+        ALLOCATE(wkf_(2 * nkqtotf), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadkmesh_para', 'Error allocating wkf_', 1)
+        !
+        DO ik = 1, nkqtotf
+          !
+          ikk = 2 * ik - 1
+          ikq = ikk + 1
+          !
+          xkf_(:, ikk) = Gs(1:3, ik)
+          wkf_(ikk) = 1.d0 ! weight not important for polaron
+          !
+          xkf_(:, ikq) = xkf_(:, ikk)
+          wkf_(ikq) = 0.d0
+          !
+        ENDDO
+        !
+        ! redefine nkqtotf to include the k+q points
+        nkqtotf = 2 * nkqtotf
+        !
+        IF (ALLOCATED(Gs)) DEALLOCATE(Gs)
+        !
+      !JLB
       ELSEIF ((nkf1 /= 0) .AND. (nkf2 /= 0) .AND. (nkf3 /= 0)) THEN ! generate grid
         IF (mp_mesh_k) THEN
           ! get size of the mp_mesh in the irr wedge
@@ -1716,16 +1877,18 @@
     USE mp_global, ONLY : inter_pool_comm
     USE mp,        ONLY : mp_bcast
     USE mp_world,  ONLY : mpime
+    USE kinds,     ONLY : DP
     USE io_global, ONLY : stdout
     USE epwcom,    ONLY : filqf, nqf1, nqf2, nqf3, &
                           rand_q, rand_nq, mp_mesh_q, system_2d, lscreen, &
-                          plselfen, specfun_pl
+                          plselfen, specfun_pl, &
+                          scell_mat_plrn, scell_mat, as, bs
     USE elph2,     ONLY : xqf, wqf, nqtotf, nqf
     USE cell_base, ONLY : at, bg
     USE symm_base, ONLY : s, t_rev, time_reversal, nsym
     USE io_var,    ONLY : iunqf
-    USE low_lvl,   ONLY : init_random_seed
-    USE constants_epw, ONLY : eps4
+    USE low_lvl,   ONLY : init_random_seed, matinv3
+    USE constants_epw, ONLY : eps4, eps8
     !
     IMPLICIT NONE
     !
@@ -1741,6 +1904,28 @@
     !! Status integer
     INTEGER :: ierr
     !! Error status
+    INTEGER :: iRp1, iRp2, iRp3, Rpmax, nRp 
+    !! Number of unit cells within supercell
+    INTEGER :: Rp_crys_p(3)
+    !! Unit cell vectors in primitive crystal coordinates
+    INTEGER, ALLOCATABLE :: Rp(:, :)
+    !! List of unit cell vectors within supercell in primitive crystal coords
+    INTEGER :: iGs1, iGs2, iGs3, Gsmax, nGs
+    !! Number of supercell G-vectors within primitive reciprocal unit cell
+    INTEGER :: Gs_crys_s(3)
+    !! Supercell G-vectors in supercell reciprocal coordinates
+    REAL(KIND = DP) :: ap(3, 3), bp(3, 3)
+    !! Auxiliary definitions of real and reciprocal primitive cell vector matrix
+    REAL(KIND = DP) :: scell_mat_b(3, 3)
+    !! Reciprocal lattice transformation matrix
+    REAL(KIND = DP) :: p2s(3, 3), bs2p(3, 3)
+    !! Transformation matrix from primitive to supercell crystal coordinates
+    REAL(KIND = DP) :: Rp_crys_s(3)
+    !! Unit cell vectors in supercell crystal coordinates
+    REAL(KIND = DP) :: Gs_crys_p(3)
+    !! Supercell G-vectors in primitive crystal coordinates
+    REAL(KIND = DP), ALLOCATABLE :: Gs(:, :)
+    !! Supercell G-vectors within primitive reciprocal unit cell
     !
     IF (mpime == ionode_id) THEN
       IF (filqf /= '') THEN ! load from file
@@ -1771,6 +1956,112 @@
           CALL cryst_to_cart(nqtotf, xqf, at, -1)
         ENDIF
         !
+      !JLB
+      ELSEIF (scell_mat_plrn) THEN
+        !
+        WRITE(stdout, '(a)') ' '
+        WRITE(stdout, '(a)') '     Supercell transformation activated (q), as=S*at'
+        WRITE(stdout, '(a,3i4)') '     S(1, 1:3): ', scell_mat(1, 1:3)
+        WRITE(stdout, '(a,3i4)') '     S(2, 1:3): ', scell_mat(2, 1:3)
+        WRITE(stdout, '(a,3i4)') '     S(3, 1:3): ', scell_mat(3, 1:3)
+        !
+        ap = TRANSPOSE(at)
+        as = MATMUL(scell_mat,ap)
+        !
+        WRITE(stdout, '(a)') '     Transformed lattice vectors (alat units):'
+        WRITE(stdout, '(a,3f12.6)') '     as(1, 1:3): ', as(1, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     as(2, 1:3): ', as(2, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     as(3, 1:3): ', as(3, 1:3)
+        !
+        scell_mat_b = matinv3(REAL(scell_mat, DP))
+        scell_mat_b = TRANSPOSE(scell_mat_b)
+        !
+        WRITE(stdout, '(a)') '     Reciprocal lattice transformation matrix, Sbar = (S^{-1})^{t}:'
+        WRITE(stdout, '(a,3f12.6)') '     Sbar(1, 1:3): ', scell_mat_b(1, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     Sbar(2, 1:3): ', scell_mat_b(2, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     Sbar(3, 1:3): ', scell_mat_b(3, 1:3)
+        !
+        bp = TRANSPOSE(bg)
+        bs = MATMUL(scell_mat_b, bp)
+        !
+        WRITE(stdout, '(a)') '     Transformed reciprocal lattice vectors (2pi/alat units):'
+        WRITE(stdout, '(a,3f12.6)') '     bs(1, 1:3): ', bs(1, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     bs(2, 1:3): ', bs(2, 1:3)
+        WRITE(stdout, '(a,3f12.6)') '     bs(3, 1:3): ', bs(3, 1:3)
+        WRITE(stdout, '(a)') '  '
+        !
+        ! Define transformation matrix from primitive crystal coordinates
+        ! to supercell crystal coordinates Rp_crys_s = ((a_s)^{T})^{-1} (a_p)^{T} Rp_crys_p
+        p2s = matinv3(TRANSPOSE(as))
+        p2s = MATMUL(p2s,TRANSPOSE(ap))
+        !
+        ! Find how many unit cells are contained within the supercell
+        Rpmax = 5*MAXVAL(scell_mat) ! This should be large enough to find all
+        ALLOCATE(Rp(3, Rpmax**3), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadqmesh_serial', 'Error allocating Rp', 1)
+        Rp  = 0
+        nRp = 0
+        DO iRp1 = -Rpmax, Rpmax
+          DO iRp2 = -Rpmax, Rpmax
+            DO iRp3 = -Rpmax, Rpmax
+              Rp_crys_p = (/iRp1, iRp2, iRp3/)
+              Rp_crys_s = MATMUL(p2s, Rp_crys_p)
+              ! Unit cell within supercell if Rp\in(0,1) in supercell crystal coordinates
+              IF (ALL(Rp_crys_s > -eps8) .AND. ALL(Rp_crys_s < 1.d0-eps8)) THEN
+                nRp = nRp + 1
+                Rp(1:3, nRp) = Rp_crys_p
+              END IF
+            END DO
+          END DO
+        END DO
+        WRITE(stdout, '(a, 3i6)') '     Number of unit cells within supercell:', nRp
+        !
+        IF (ALLOCATED(Rp)) DEALLOCATE(Rp)
+        !
+        ! Define transformation matrix from reciprocal supercell crystal coordinates
+        ! to reciprocal primitive crystal coordinates
+        ! Gs_crys_p = ((bp)^{T})^{-1} (bs)^{T} Gs_crys_s
+        bs2p = matinv3(TRANSPOSE(bp))
+        bs2p = MATMUL(bs2p, TRANSPOSE(bs))
+        !
+        ! Find how many q-points lie within primitive reciprocal cell
+        Gsmax = Rpmax ! This should be large enough to find all
+        ALLOCATE(Gs(3, Gsmax**3), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadqmesh_serial', 'Error allocating Gs', 1)
+        Gs = 0.d0
+        nGs = 0
+        DO iGs1 = -Gsmax, Gsmax
+          DO iGs2 = -Gsmax, Gsmax
+            DO iGs3 = -Gsmax, Gsmax
+              Gs_crys_s = (/iGs1, iGs2, iGs3/)
+              Gs_crys_p = MATMUL(bs2p, Gs_crys_s)
+              ! Gs within primitive reciprocal unit cell if Gs\in(0,1) in crys_p coords.
+              IF (ALL(Gs_crys_p > -eps8) .AND. ALL(Gs_crys_p < 1.d0-eps8)) THEN
+                nGs = nGs + 1
+                Gs(1:3, nGs) = Gs_crys_p
+              END IF
+            END DO  
+          END DO
+        END DO
+        WRITE(stdout, '(a, 3i6)') '     Number of q-points needed:', nGs
+        !
+        ! Save list of needed q-points
+        nqtotf = nGs
+        ALLOCATE(xqf(3, nqtotf), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadqmesh_serial', 'Error allocating xqf', 1)
+        ALLOCATE(wqf(nqtotf), STAT = ierr)
+        IF (ierr /= 0) CALL errore('loadqmesh_serial', 'Error allocating wqf', 1)
+        !
+        DO iq = 1, nqtotf
+          !
+          xqf(:, iq) = Gs(1:3, iq)
+          wqf(iq) = 1.d0 ! weight not important for polaron
+          !
+        ENDDO
+        !
+        IF (ALLOCATED(Gs)) DEALLOCATE(Gs)
+        !
+      !JLB
       ELSEIF ((nqf1 /= 0) .AND. (nqf2 /= 0) .AND. (nqf3 /= 0)) THEN ! generate grid
         IF (mp_mesh_q) THEN
           IF (lscreen) CALL errore ('loadqmesh', 'If lscreen=.TRUE. do not use mp_mesh_q',1)
diff --git a/EPW/src/io_var.f90 b/EPW/src/io_var.f90
index dbc8cfe5a..e21b6d687 100644
--- a/EPW/src/io_var.f90
+++ b/EPW/src/io_var.f90
@@ -44,6 +44,7 @@
             iunsparset_merge, iunepmatcb_merge, iunsparseqcb_merge,                &
             iunsparseicb_merge, iunsparsejcb_merge, iunsparsetcb_merge,            &
             iunsparsekcb_merge, iunepmat_merge
+  PUBLIC :: iunRpscell, iunkgridscell, iunpsirscell
 
   !
   ! Output of physically relevant quantities (60-100)
@@ -176,6 +177,12 @@
   ! Miscellaneous (326-350)
   INTEGER :: epwbib          = 326 ! EPW bibliographic file.
   !
+  ! Output quantities related to polaron (350-400)
+  !JLB: All the other polaron I/O units should also be defined here for consistency
+  INTEGER :: iunRpscell      = 351 ! Rp unit cell list within polaron supercell
+  INTEGER :: iunkgridscell   = 352 ! Gs k-grid used for transformed supercell
+  INTEGER :: iunpsirscell    = 353 ! Polaron wf in real space for transformed supercell
+  !
   ! Merging of files (400-450)
   INTEGER :: iunepmat_merge    = 400
   INTEGER :: iunsparseq_merge  = 401
diff --git a/EPW/src/low_lvl.f90 b/EPW/src/low_lvl.f90
index 34d992167..0e9e3b312 100644
--- a/EPW/src/low_lvl.f90
+++ b/EPW/src/low_lvl.f90
@@ -864,44 +864,55 @@
     !
     !-----------------------------------------------------------------------
     FUNCTION matinv3(A) RESULT(B)
-    !-----------------------------------------------------------------------
-    !!
-    !! Performs a direct calculation of the inverse of a 3×3 matrix.
-    !!
-    USE kinds,         ONLY : DP
-    USE constants_epw, ONLY : eps160
-    !
-    REAL(KIND = DP), INTENT(in) :: A(3, 3)
-    !! Matrix
-    !
-    ! Local variable
-    REAL(KIND = DP) :: detinv
-    !! Inverse of the determinant
-    REAL(KIND = DP) :: B(3, 3)
-    !! Inverse matrix
-    !
-    ! Calculate the inverse determinant of the matrix
-    detinv = 1 / (A(1, 1) * A(2, 2) * A(3, 3) - A(1, 1) * A(2, 3) * A(3, 2) &
-                - A(1, 2) * A(2, 1) * A(3, 3) + A(1, 2) * A(2, 3) * A(3, 1) &
-                + A(1, 3) * A(2, 1) * A(3, 2) - A(1, 3) * A(2, 2) * A(3, 1))
-    !
-    IF (detinv < eps160) THEN
-      CALL errore('matinv3', 'Inverse does not exist ', 1)
-    ENDIF
-    !
-    ! Calculate the inverse of the matrix
-    B(1, 1) = +detinv * (A(2, 2) * A(3, 3) - A(2, 3) * A(3, 2))
-    B(2, 1) = -detinv * (A(2, 1) * A(3, 3) - A(2, 3) * A(3, 1))
-    B(3, 1) = +detinv * (A(2, 1) * A(3, 2) - A(2, 2) * A(3, 1))
-    B(1, 2) = -detinv * (A(1, 2) * A(3, 3) - A(1, 3) * A(3, 2))
-    B(2, 2) = +detinv * (A(1, 1) * A(3, 3) - A(1, 3) * A(3, 1))
-    B(3, 2) = -detinv * (A(1, 1) * A(3, 2) - A(1, 2) * A(3, 1))
-    B(1, 3) = +detinv * (A(1, 2) * A(2, 3) - A(1, 3) * A(2, 2))
-    B(2, 3) = -detinv * (A(1, 1) * A(2, 3) - A(1, 3) * A(2, 1))
-    B(3, 3) = +detinv * (A(1, 1) * A(2, 2) - A(1, 2) * A(2, 1))
-    !-----------------------------------------------------------------------
-    END FUNCTION matinv3
-    !-----------------------------------------------------------------------
+      !-----------------------------------------------------------------------
+      !!
+      !! Performs a direct calculation of the inverse of a 3×3 matrix.
+      !!
+      USE kinds,         ONLY : DP
+      USE constants_epw, ONLY : eps160
+      !
+      REAL(KIND = DP), INTENT(in) :: A(3, 3)
+      !! Matrix
+      !
+      ! Local variable
+      REAL(KIND = DP) :: detinv, det
+      !! Inverse of the determinant
+      REAL(KIND = DP) :: B(3, 3)
+      !! Inverse matrix
+      !
+      !! Calculate the inverse determinant of the matrix
+      !detinv = 1 / (A(1, 1) * A(2, 2) * A(3, 3) - A(1, 1) * A(2, 3) * A(3, 2) &
+      !            - A(1, 2) * A(2, 1) * A(3, 3) + A(1, 2) * A(2, 3) * A(3, 1) &
+      !            + A(1, 3) * A(2, 1) * A(3, 2) - A(1, 3) * A(2, 2) * A(3, 1))
+      !!
+      !IF (detinv < eps160) THEN
+      !  CALL errore('matinv3', 'Inverse does not exist ', 1)
+      !ENDIF
+      !JLB
+      det = (A(1, 1) * A(2, 2) * A(3, 3) - A(1, 1) * A(2, 3) * A(3, 2)  &
+            - A(1, 2) * A(2, 1) * A(3, 3) + A(1, 2) * A(2, 3) * A(3, 1) &
+            + A(1, 3) * A(2, 1) * A(3, 2) - A(1, 3) * A(2, 2) * A(3, 1))
+      !
+      IF (ABS(det) < eps160) THEN
+        CALL errore('matinv3', 'Inverse does not exist ', 1)
+      END IF
+      !
+      detinv = 1 / det
+      !JLB
+      !
+      ! Calculate the inverse of the matrix
+      B(1, 1) = +detinv * (A(2, 2) * A(3, 3) - A(2, 3) * A(3, 2))
+      B(2, 1) = -detinv * (A(2, 1) * A(3, 3) - A(2, 3) * A(3, 1))
+      B(3, 1) = +detinv * (A(2, 1) * A(3, 2) - A(2, 2) * A(3, 1))
+      B(1, 2) = -detinv * (A(1, 2) * A(3, 3) - A(1, 3) * A(3, 2))
+      B(2, 2) = +detinv * (A(1, 1) * A(3, 3) - A(1, 3) * A(3, 1))
+      B(3, 2) = -detinv * (A(1, 1) * A(3, 2) - A(1, 2) * A(3, 1))
+      B(1, 3) = +detinv * (A(1, 2) * A(2, 3) - A(1, 3) * A(2, 2))
+      B(2, 3) = -detinv * (A(1, 1) * A(2, 3) - A(1, 3) * A(2, 1))
+      B(3, 3) = +detinv * (A(1, 1) * A(2, 2) - A(1, 2) * A(2, 1))
+      !-----------------------------------------------------------------------
+      END FUNCTION matinv3
+      !-----------------------------------------------------------------------
     !-----------------------------------------------------------------------
     PURE FUNCTION find_minimum(grid, grid_dim) RESULT(minpos)
     !-----------------------------------------------------------------------
diff --git a/EPW/src/polaron.f90 b/EPW/src/polaron.f90
new file mode 100644
index 000000000..d40889fda
--- /dev/null
+++ b/EPW/src/polaron.f90
@@ -0,0 +1,3792 @@
+!
+! Copyright (C) 2010-2016 Samuel Ponce', Roxana Margine, Carla Verdi, Feliciano Giustino
+! Copyright (C) 2007-2009 Jesse Noffsinger, Brad Malone, Feliciano Giustino
+!
+! This file is distributed under the terms of the GNU General Public
+! License. See the file `LICENSE' in the root directory of the
+! present distribution, or http://www.gnu.org/copyleft.gpl.txt .
+!
+!-----------------------------------------------------------------------
+MODULE polaron
+   !!
+   !! This module contains variables and subroutines of polaron
+   !! Authored by Chao Lian, Weng Hong (Denny) Sio, and Jon Lafuente-Bartolome
+   !!
+   USE kinds,  ONLY : dp
+   USE buffers,       ONLY : open_buffer, get_buffer, save_buffer, close_buffer
+
+   IMPLICIT NONE
+   !! pool-dependent Hamiltonian
+   COMPLEX(DP), ALLOCATABLE :: Hamil(:, :)
+   !! polaron eigenvector
+   COMPLEX(DP), ALLOCATABLE :: eigVec(:, :), hEigVec(:, :)
+   !! Gathered eigenvalues over the pools
+   COMPLEX(DP), ALLOCATABLE :: gq_model(:)
+   !! el-ph matrix element in vertex model
+   COMPLEX(DP), ALLOCATABLE :: M_mat(:, :, :, :)
+   !! el-ph matrix element in vertex model
+   COMPLEX(DP), ALLOCATABLE :: epf(:, :, :, :), epfall(:, :, :, :, :)
+   !! el-ph matrix element in vertex model
+   REAL(DP),    ALLOCATABLE :: etf_model(:), wq_model
+   !! band structure and phonon freq in vertex model
+   REAL(DP),    ALLOCATABLE :: etf_all(:, :)
+   !! Gathered k points coordinates over the pools
+   REAL(DP),    ALLOCATABLE :: xkf_all(:,:) !, xkf_save(:,:)
+   !! Generate the maps of k->k+q and k->G-k
+   !INTEGER,     ALLOCATABLE :: Rp_array(:, :, :)
+   !! Generate the maps of k->k+q and k->G-k
+   INTEGER,     ALLOCATABLE :: kpg_map(:) ! remove ikq_all(:, :), too large (nkf, nktotf), calculate when in use
+   !! Number of bands in subgroup used in polaron calculations
+   LOGICAL,     ALLOCATABLE :: is_mirror_k(:), is_mirror_q(:), is_tri_q(:), is_tri_k(:) !JLB
+   !! is this local k and global q point is a mirror point, used for time-reversal symmertry
+   COMPLEX(dp), ALLOCATABLE :: rho_mat(:, :, :, :)
+   !!
+   INTEGER                  :: nbnd_plrn, nbnd_g_plrn!, iq_save, ik_save
+   !!
+   INTEGER                  :: lword_h, lword_g, lword_m, lword_umn, lword_ekanu!, iq_save, ik_save
+   !!
+   INTEGER                  :: io_level_g_plrn, io_level_h_plrn, io_level_umn_plrn, io_level_ekanu_plrn!, iq_save, ik_save
+   !!
+   INTEGER                  :: hblocksize
+   !!
+   INTEGER                  :: band_pos
+   !!
+   INTEGER                  :: ibvec(-3:3)
+   !!
+   INTEGER                  :: ik_edge
+   !!
+   INTEGER,     ALLOCATABLE :: select_bands_plrn(:)
+   !!
+   INTEGER,       PARAMETER :: iepfall = 12315, ihamil = 12312, iMmn = 12344, irho = 12555, iUmn = 12325, iekanu = 12326
+   !! The file unit to read el-ph matrix element
+   LOGICAL                  :: test_tags_plrn(20) = .false., mem_save_h = .false.
+   !! The B matrix Bqu
+   COMPLEX(DP),  ALLOCATABLE :: Bmat(:,:)
+   !!
+   COMPLEX(DP) :: berry_phase(1:3)
+   !!
+   ! JLB: for non-diagonal supercells
+   INTEGER :: nRp
+   !! Number of unit cells on supercell
+   INTEGER, ALLOCATABLE :: Rp(:,:)
+   !! List of unit cell vectors within supercell
+   !
+   PUBLIC  :: plrn_prepare, plrn_flow_select
+CONTAINS
+   !
+   !-----------------------------------------------------------------------
+   SUBROUTINE plrn_prepare(totq, iq_restart)
+      USE epwcom,        ONLY : start_band_plrn, end_band_plrn, nbndsub, nstate_plrn, debug_plrn
+      USE epwcom,        ONLY : cal_psir_plrn, restart_plrn,  interp_Ank_plrn, interp_Bqu_plrn
+      USE epwcom,        ONLY : model_vertex_plrn, full_diagon_plrn, nhblock_plrn, Mmn_plrn, lifc
+      USE epwcom,        ONLY : g_start_band_plrn, g_end_band_plrn
+      USE epwcom,        ONLY : g_start_energy_plrn, g_end_energy_plrn
+      USE epwcom,        ONLY : model_enband_plrn, model_phfreq_plrn, model_vertex_plrn
+      USE epwcom,        ONLY : g_power_order_plrn, io_lvl_plrn
+      USE epwcom,        ONLY : m_eff_plrn, kappa_plrn, omega_LO_plrn, fsthick, etf_mem
+      USE epwcom,        ONLY : lrot, lphase
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE elph2,         ONLY : nkqf, nkf, nqf, nqtotf, nktotf, etf
+      USE modes,         ONLY : nmodes
+      USE mp_world,      ONLY : mpime
+      USE cell_base,     ONLY : bg, at, omega, alat
+      USE constants_epw, ONLY : czero, zero, ryd2ev
+      USE elph2,         ONLY : xkf, xqf, wf, xkq, chw
+      USE constants_epw, ONLY : czero, cone, pi, ci, twopi, fpi, eps6, eps8, eps5
+      USE epwcom,        ONLY : nkf1, nkf2, nkf3, seed_plrn
+      USE epwcom,        ONLY : nqf1, nqf2, nqf3, type_plrn
+      USE epwcom,        ONLY : scell_mat_plrn
+      USE poolgathering, ONLY : poolgather2
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE io_files,      ONLY : check_tempdir
+
+      IMPLICIT NONE
+      INTEGER, INTENT(OUT)  :: iq_restart
+      INTEGER, INTENT(IN)   :: totq
+      LOGICAL               :: debug, plrn_scf, exst, pfs
+      INTEGER :: inu, iq, ik, ikk, jk, iibnd, jjbnd, ibnd, jbnd, ikq, ik_global, iplrn, ierr
+      INTEGER :: iter, icount, ix, iy, iz, start_mode, idos, iatm, indexkn1, indexkn2
+      INTEGER :: ikGamma, iqGamma, io_level, minNBlock, ishift
+      !INTEGER(kind=8), parameter :: maxword = 2**31 - 1
+      INTEGER, PARAMETER :: maxword = HUGE(1)
+      INTEGER :: lword_h_tmp, lword_g_tmp, lword_umn_tmp, lword_ekanu_tmp
+      REAL(DP), ALLOCATABLE :: rtmp2(:,:)
+      REAL(DP) :: xxk(3), xkf_cart(3, nkqf), xqf_cart(3, nqf), efermi, xkf_len(nkqf), klen, shift(3), rfac
+      CHARACTER(LEN=100) :: fmt_mode
+
+      CALL start_clock('plrn_prepare')
+      IF(etf_mem == 3) THEN
+         CALL errore('polaron_prepare', 'Polaron module not working with etf_mem = 3', 1)
+      END IF
+
+      WRITE(stdout, '(5x,"fsthick not working in polaron module, selecting all the k/q points.")')
+      !! type_plrn denotes whether electron polaron (-1) or hole polaron (+1)
+      !! Legalize the type_plrn input, in case that the user use an arbitrary number
+      IF(type_plrn < 0) THEN
+         type_plrn = -1
+         WRITE(stdout, '(5x,"The electron polaron is calculated.")')
+      ELSE
+         type_plrn = 1
+         WRITE(stdout, '(5x,"The hole polaron is calculated.")')
+      END IF
+
+      lrot = .true.
+      lphase = .true.
+
+      debug = debug_plrn
+      IF (debug_plrn) CALL check_tempdir('test_out', exst, pfs)
+
+      WRITE(stdout,'(5x,a)') REPEAT('=',67)
+
+
+
+      IF(g_start_band_plrn == 0) g_start_band_plrn = 1
+      IF(g_end_band_plrn == 0) g_end_band_plrn = nbndsub
+      nbnd_g_plrn = g_end_band_plrn - g_start_band_plrn + 1
+
+      IF(start_band_plrn == 0) start_band_plrn = g_start_band_plrn
+      IF(end_band_plrn == 0) end_band_plrn = g_end_band_plrn
+      nbnd_plrn = end_band_plrn - start_band_plrn + 1
+
+      IF(g_start_band_plrn > start_band_plrn .or. g_end_band_plrn < end_band_plrn) THEN
+         CALL errore('polaron_prepare', 'Selecting more bands in polaron than saving g matrix', 1)
+      END IF
+
+      ALLOCATE(select_bands_plrn(nbnd_plrn), STAT = ierr)
+      IF (ierr /= 0) CALL errore('polaron_prepare', 'Error allocating select_bands_plrn', 1)
+
+      select_bands_plrn = 0
+      DO ibnd = 1, nbnd_plrn
+         select_bands_plrn(ibnd) = start_band_plrn + ibnd - 1
+      END DO
+
+      !! copy q(x,y,z) to xkf_all, save the copy of all kpoints
+      !! Note that poolgather2 has the dimension of nktotf*2,
+      !! which has k at ik and k+q at ik+1
+      !! This is because that xkf has the dimension of 3, nkf*2
+      !! where the ik is k and ik+1 is k+q
+      ALLOCATE(xkf_all(3, nktotf), STAT = ierr)
+      IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating xkf_all', 1)
+      ALLOCATE(rtmp2(3, nktotf*2), STAT = ierr)
+      IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating rtmp2', 1)
+      ALLOCATE(epf(nbnd_g_plrn, nbnd_g_plrn, nmodes, nkf), STAT = ierr)
+      IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating epf', 1)
+      epf = czero
+      !
+      xkf_all = zero
+      rtmp2 = zero
+      CALL poolgather2 ( 3, nktotf*2, nkqf, xkf, rtmp2)
+      xkf_all(1:3, 1:nktotf) = rtmp2(1:3, 1:nktotf*2:2)
+
+      IF(debug_plrn) THEN
+         DO ik = 1, nktotf
+            WRITE(stdout, '(5x, 3f15.6)') xkf_all(1:3, ik)
+         END DO
+      END IF
+
+      DEALLOCATE(rtmp2)
+
+      WRITE(stdout, "(5x, 'Use the band from ',i0, ' to ', i0, ' total ', i0)") start_band_plrn, end_band_plrn, nbnd_plrn
+      WRITE(stdout, "(5x, 'Including bands: ', 10i3)") select_bands_plrn
+      WRITE(stdout, "(5x, 'Use the band from ',i0, ' to ', i0, ' total ', i0, ' in saving g')") &
+            g_start_band_plrn, g_end_band_plrn, nbnd_g_plrn
+
+
+      WRITE(stdout, "(5x, 'Gathering eigenvalues of ', i0, ' bands and ', i0, ' k points')") nbndsub, nktotf
+      ALLOCATE(etf_all(nbndsub, nktotf), STAT = ierr)
+      IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating etf_all', 1)
+
+      IF(model_enband_plrn) THEN
+         etf = zero
+         ! Find the distance to the nearest Gamma point in crystal coordinates
+         DO ik = 1, 2*nkf
+            klen = 1E3
+            xxk = xkf(:, ik)
+            CALL cryst_to_cart(1, xxk, bg, 1)
+            !WRITE(stdout, '(3f7.4)') xxk * twopi/alat
+            DO ishift = 1, 27
+               shift(1:3) = REAL(index_shift(ishift), KIND=DP)
+               xxk = xkf(:, ik) + shift
+               CALL cryst_to_cart(1, xxk, bg, 1)
+               klen = MIN(klen, NORM2(xxk))
+            END DO
+            etf(1, ik) = 0.5/m_eff_plrn * (klen * twopi/alat)**2
+         END DO
+      END IF
+
+      etf_all = zero
+      CALL gather_band_eigenvalues(etf, etf_all)
+
+      IF(model_vertex_plrn) THEN
+         WRITE(stdout, '(5x, a, f8.3)') "Using model g vertex, with order ", g_power_order_plrn
+         ALLOCATE(gq_model(nqf), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating gq_model', 1)
+         gq_model = zero
+
+         rfac = SQRT(fpi/omega*omega_LO_plrn/kappa_plrn)
+         DO iq = 1, nqf
+            klen = 1E3
+            DO ishift = 1, 27
+               shift(1:3) = REAL(index_shift(ishift), KIND=DP)
+               xxk = xqf(:, iq) + shift
+               CALL cryst_to_cart(1, xxk, bg, 1)
+               klen = MIN(klen, NORM2(xxk))
+            END DO
+            IF(klen > eps8) THEN
+               gq_model(iq) = rfac/((klen * twopi/alat) ** g_power_order_plrn)
+            END IF
+         END DO
+      END IF
+
+      ! change unit from eV to Rydberg
+      g_start_energy_plrn = g_start_energy_plrn / ryd2ev
+      g_end_energy_plrn = g_end_energy_plrn / ryd2ev
+
+      CALL start_clock('find_EVBM')
+      CALL find_band_extreme(type_plrn, etf_all, ik_edge, band_pos, efermi)
+
+      !! Determine the Fermi energy, read from the input or calculated from band structure
+      WRITE(stdout, '(5x, "Fermi Energy is", f16.7, &
+      &" (eV) located at kpoint ", i6, 3f8.3, " band ", i3)') efermi*ryd2ev, ik_edge, xkf_all(1:3, ik_edge), band_pos
+      ! Shift the eigenvalues to make VBM/CBM zero
+      etf_all(1:nbndsub, 1:nktotf) = etf_all(1:nbndsub, 1:nktotf) - efermi
+
+      CALL stop_clock('find_EVBM')
+
+      WRITE(stdout, "(5x, 'Allocating arrays and open files.')")
+
+      IF(interp_Ank_plrn .or. interp_Bqu_plrn .or. cal_psir_plrn) THEN
+         plrn_scf = .false.
+         restart_plrn = .true.
+      ELSE
+         plrn_scf = .true.
+      END IF
+      IF(restart_plrn) THEN
+         iq_restart = totq + 1
+      ELSE
+         iq_restart = 1
+      END IF
+      IF(interp_Ank_plrn) THEN
+         ALLOCATE(eigVec(nktotf*nbnd_plrn, nstate_plrn), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_prepare', 'Error allocating eigVec', 1)
+         eigVec = czero
+      ELSE IF(plrn_scf) THEN
+         CALL check_tempdir('plrn_tmp', exst, pfs)
+
+         io_level_g_plrn = 1
+         lword_g_tmp = nbnd_g_plrn * nbnd_g_plrn * nmodes * nkf
+         IF(lword_g_tmp > maxword) THEN
+            CALL errore('polaron_prepare', 'Record size larger than maximum, use more cores!', 1)
+         ELSE
+            lword_g = INT(lword_g_tmp)
+         END IF
+         IF (io_lvl_plrn == 0) THEN
+            ALLOCATE(epfall(nbnd_g_plrn, nbnd_g_plrn, nmodes, nkf, nqtotf), STAT = ierr)
+            IF (ierr /= 0) CALL errore('polaron_prepare', 'Error allocating epfall', 1)
+            epfall = czero
+         ELSE IF (io_lvl_plrn == 1) THEN
+            CALL open_buffer( iepfall , 'ephf' , lword_g, io_level_g_plrn, exst, direc='plrn_tmp/') !,
+         END IF
+
+         !!JLB
+         !! Open unit to write umn matrices
+         !OPEN(UNIT=iUmn, ACTION='write', FILE='plrn_tmp/umn.dat')
+         !! Open unit to write ekanu matrices
+         !OPEN(UNIT=iekanu, ACTION='write', FILE='plrn_tmp/ekanu.dat')
+         !!JLB
+
+
+         io_level_h_plrn = 1
+         IF(nhblock_plrn < 1 .or. nhblock_plrn > nkf * nbnd_plrn ) THEN
+            CALL errore('polaron_prepare','Illegal nhblock_plrn, should between 1 and nkf * nbnd_plrn', 1)
+         END IF
+         minNBlock = CEILING(REAL(nkf * nbnd_plrn * nktotf * nbnd_plrn, dp) / maxword)
+
+         IF(minNBlock >  nhblock_plrn .and. nhblock_plrn /= 1) THEN
+            CALL errore('polaron_prepare', 'Record size larger than maximum, use more cores!', 1)
+         END IF
+
+         hblocksize = CEILING(REAL(nkf * nbnd_plrn, dp) / nhblock_plrn)
+
+         lword_h_tmp = nktotf * nbnd_plrn * hblocksize
+
+         IF(nhblock_plrn /= 1) THEN
+            IF(lword_h_tmp > maxword) THEN
+               CALL errore('polaron_prepare', 'Record size larger than maximum, use more cores or larger nhblock_plrn!', 1)
+            ELSE
+               lword_h = lword_h_tmp
+               !IF(nhblock_plrn /= 1) CALL open_buffer( ihamil , 'ham' , lword_h, io_level_h_plrn, exst, direc='plrn_tmp/')
+            END IF
+         END IF
+
+         lword_m = nbnd_plrn * nbnd_plrn * nktotf * 3
+         !CALL open_buffer( iMmn , 'mmn' , lword_m, io_level_g_plrn, exst, direc='plrn_tmp/')
+         !CALL open_buffer( irho , 'rho' , lword_m, io_level_g_plrn, exst, direc='plrn_tmp/')
+
+         ALLOCATE(Hamil(nktotf*nbnd_plrn, hblocksize), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_prepare', 'Error allocating Hamil', 1)
+
+         ! Allocate and initialize the variables
+         ALLOCATE(eigVec(nktotf*nbnd_plrn, nstate_plrn), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_prepare', 'Error allocating eigVec', 1)
+         eigVec = czero
+
+         !! Check whether the input is legal, otherwise print warning and stop
+         !! Check the input now, because if inputs are illegal, we can stop the calculation
+         !! before the heavy el-ph interpolation begins.
+         IF(nktotf /= nqtotf .or. nktotf < 1) CALL errore('polaron_scf','Not identical k and q grid. Do use same nkf and nqf!', 1)
+
+         IF( (.NOT. scell_mat_plrn) .AND. (nkf1 == 0 .or. nkf2 == 0 .or. nkf3 == 0) ) THEN
+            CALL errore('polaron_scf','Try to use nkf and nqf to generate k and q grid, &
+               IF you are using a manual grid, also provide this information.', 1)
+         END IF
+         IF(nkf < 1) CALL errore('polaron_scf','Some node has no k points!', 1)
+         IF(nqtotf /= nqf) CALL errore('polaron_scf','Parallel over q is not available for polaron calculations.', 1)
+
+         WRITE(stdout, '(5x, "Polaron wavefunction calculation starts with k points ",&
+            &i0, ", q points ", i0, " and KS band ", i0)') nktotf,  nqtotf,  nbnd_plrn
+
+         !! check whether the k and q mesh are identical
+         !! This may not be theoretically necessary,
+         !! but necessary in this implementation
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            IF (ANY(ABS(xkf_all(1:3, ik_global) - xqf(1:3, ik_global)) > eps6)) THEN
+               CALL errore('polaron_scf', 'The k and q meshes must be exactly the same!', 1)
+            END IF
+         END DO
+
+         !! map iq to G-iq, ik to G-ik
+         !! find the position of Gamma point in k and q grid
+         !! Note that xqf is not a MPI-local variable, xqf = xqtotf otherwise
+         !! the program gives wrong results
+         ikGamma = indexGamma(xkf_all)
+         iqGamma = indexGamma(xqf)
+         IF (ikGamma == 0) CALL errore('polaron_scf','k=0 not included in k grid!', 1)
+         IF (iqGamma == 0) CALL errore('polaron_scf','q=0 not included in q grid!', 1)
+         WRITE(stdout, '(5x, "The index of Gamma point in k grid is ", i0, " &
+            &and q grid IS ", i0)') ikGamma, iqGamma
+
+         !! Given k, find the index of -k+G, to impose the relation
+         !! A_{n,-k+G} = A^*_{n,k} and B_{-q+G, \nu} = B^*_{q, \nu}
+         !! the relation k1 + k2 = G is unique given k1
+         !! Since both A and B have the dimension of nktotf/nqtotf,
+         !! kpg_map should map all the k from 1 to nktotf (global)
+         ALLOCATE(kpg_map(nqtotf), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating kpg_map', 1)
+         kpg_map = 0
+
+         ALLOCATE(is_mirror_k(nkf), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating is_mirror_k', 1)
+         ALLOCATE(is_mirror_q(nqf), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating is_mirror_q', 1)
+         !JLB
+         ALLOCATE(is_tri_k(nkf), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating is_mirror_q', 1)
+         is_tri_k = .false.
+         ALLOCATE(is_tri_q(nqf), STAT = ierr)
+         IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating is_mirror_q', 1)
+         is_tri_q = .false.
+
+         is_mirror_q = .false.
+         is_mirror_k = .false.
+
+
+         WRITE(stdout, '(5x, a)') "Finding the index of -k for each k point."
+         ! For two k points k1 and k2, if k1 = G - k2, G is any reciprocal vector
+         ! then k2 is the mirror point if the index of k2 is larger than k1
+         ! Same rule for q, while is_mirror_q(nqf) is global but is_mirror_k(nkf) is local
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            DO ikq = 1, nktotf
+               ! -k+G = k', i.e. k' + k = G, G may be (0, 0, 0)
+               xxk = xkf_all(1:3, ik_global) + xkf_all(1:3, ikq)
+               IF (isGVec(xxk)) kpg_map(ik_global) = ikq
+            END DO
+            IF (kpg_map(ik_global) == 0) CALL errore('polaron_scf', 'Not legal k/q grid!', 1)
+
+            ikq = kpg_map(ik_global)
+            IF (ik_global > ikq) THEN
+               is_mirror_k(ik) = .true.
+            !JLB
+            ELSE IF (ik == ikq) THEN
+               is_tri_k(ik) = .true.
+            !JLB
+            END IF
+         END DO
+         CALL mp_sum(kpg_map, inter_pool_comm)
+
+         DO iq = 1, nqf
+            ikq = kpg_map(iq)
+            IF (iq > ikq) THEN
+               is_mirror_q(iq) = .true.
+            !JLB
+            ELSE IF (iq == ikq) THEN
+               is_tri_q(iq) = .true.
+            !JLB
+            END IF
+         END DO
+
+
+         WRITE(stdout, '(5x, a)') "Checking the k + q is included in the mesh grid for each k and q."
+         !! find the global index of ik_global, ikq with vector k and k+q.
+         !! Different from kpg_map, it is used in constructing Hamiltonian or hpsi
+         !! ik goes over all the local k points, to parallel the program
+         DO ik = 1, nkf
+            DO iq = 1, nqtotf
+               ik_global = ikqLocal2Global(ik, nktotf)
+               xxk = xkf_all(1:3, iq) + xkf_all(1:3, ik_global)
+               IF (ikq_all(ik, iq) == 0) THEN
+                  CALL errore('polaron_scf','Not commensurate k and q grid!', 1)
+               END IF
+            END DO
+         END DO
+
+
+
+      END IF
+
+      WRITE(stdout, "(5x, 'End of plrn_prepare')")
+      CALL stop_clock('plrn_prepare')
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   SUBROUTINE plrn_save_g_to_file(iq, epf17, wf)
+      USE modes,         ONLY : nmodes
+      USE epwcom,        ONLY : g_start_band_plrn, g_end_band_plrn
+      USE epwcom,        ONLY : nbndsub, g_tol_plrn, io_lvl_plrn
+      USE epwcom,        ONLY : g_start_energy_plrn, g_end_energy_plrn
+      USE elph2,         ONLY : nkf, nktotf, nqtotf
+      USE constants_epw, ONLY : eps8, two, czero
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE mp_global,     ONLY : inter_pool_comm
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+
+      IMPLICIT NONE
+      INTEGER :: ik, ikq, imode, ibnd, jbnd, ik_global
+      INTEGER, SAVE :: g_count_sum
+      COMPLEX(KIND = DP), INTENT(INOUT) :: epf17(:, :, :, :)
+      REAL(KIND = DP), INTENT(IN) :: wf(:, :)
+      REAL(KIND = DP) :: eig
+      INTEGER, INTENT(IN) :: iq
+      COMPLEX(DP) :: ctemp
+      ! In polaron equations, g is not epf17 but epf17/omega
+      ! To ensure a Hermitian Hamiltonian, g_{mnu}(k, -q) is calculated as g*_{nmu}(k-q, q)
+      DO ik = 1, nkf
+         ikq = ikq_all(ik, iq)
+         DO imode = 1, nmodes
+            IF (wf(imode, iq) > eps8) THEN
+               epf(:, :, imode, ik) = &
+               epf17(g_start_band_plrn:g_end_band_plrn, g_start_band_plrn:g_end_band_plrn, imode, ik)/DSQRT( two * wf(imode, iq))
+            END IF
+         END DO
+      END DO
+
+      IF(io_lvl_plrn == 0) THEN
+         epfall(:, :, : ,: ,iq) = epf(:, :, :, :)
+      ELSE IF (io_lvl_plrn == 1) THEN
+         CALL save_buffer(epf(:, :, :, :), nbnd_g_plrn*nbnd_g_plrn*nmodes*nkf, iepfall, iq)
+      END IF
+
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   FUNCTION ikq_all(ik, iq)
+      ! find the global index of k+q for the local ik and global iq
+      USE elph2, ONLY : nktotf
+      USE epwcom, ONLY : nkf1, nkf2, nkf3
+
+      IMPLICIT NONE
+
+      INTEGER :: ikq, ik_global, ikq_all, index_target(1:3), index_kq, ikq_loop
+      INTEGER, INTENT(IN) :: ik, iq ! ik is local and iq is global
+
+      REAL(dp) :: xxk(1:3), xxk_target(1:3)
+
+      CALL start_clock('find_k+q')
+      ikq_all = 0
+
+      ik_global = ikqLocal2Global(ik, nktotf)
+      xxk = xkf_all(1:3, iq) + xkf_all(1:3, ik_global)
+
+      xxk_target(1:3) = xxk(1:3) - INT(xxk(1:3))
+      index_target(1:3) = NINT(xxk_target(1:3) * (/nkf1, nkf2, nkf3/))
+
+      index_kq = index_target(1) * nkf1 * nkf2 + index_target(2) * nkf2 + index_target(3) + 1
+
+      !print *, "is this correct ", xxk(1:3), xkf_all(1:3, index_kq), index_target(1:3), isGVec(xxk - xkf_all(1:3, index_kq))
+      DO ikq_loop = index_kq - 1, nktotf + index_kq
+         ! ik (local) + iq (global) = ikq (global)
+         ! get ikq to locate the column of the Hamiltonian
+         ikq = MOD(ikq_loop, nktotf) + 1
+         IF (isGVec(xxk - xkf_all(1:3, ikq))) THEN
+            ikq_all = ikq
+            !print *, "found ikq at ", ikq_loop - index_kq, " cycles"
+            EXIT
+         END IF
+      END DO
+      CALL stop_clock('find_k+q')
+
+      IF (ikq_all == 0) CALL errore('ikq_all','k + q not found', 1)
+
+   END FUNCTION
+   !-----------------------------------------------------------------------
+   FUNCTION find_ik(xxk, xkf_all)
+      USE elph2, ONLY : nktotf
+      USE epwcom, ONLY : nkf1, nkf2, nkf3
+
+      IMPLICIT NONE
+
+      INTEGER :: ik, find_ik
+
+      REAL(dp), INTENT(IN) :: xxk(1:3), xkf_all(1:3, 1:nktotf)
+      REAL(dp) :: xkq(1:3)
+      !xxk_target(1:3)
+
+      CALL start_clock('find_k')
+      find_ik = 0
+      DO ik = 1, nktotf
+         xkq(1:3) = xkf_all(1:3, ik) - xxk(1:3)
+         IF(isGVec(xkq)) THEN
+            find_ik = ik
+            EXIT
+         END IF
+      END DO
+
+      IF (find_ik == 0) CALL errore('find_ik','k not found', 1)
+      CALL stop_clock('find_k')
+   END FUNCTION
+   !-----------------------------------------------------------------------
+   SUBROUTINE plrn_flow_select(nrr_k, ndegen_k, irvec_r, nrr_q, ndegen_q, irvec_q, rws, nrws, dims)
+      USE epwcom,        ONLY : cal_psir_plrn, restart_plrn,  interp_Ank_plrn, interp_Bqu_plrn
+      USE epwcom,        ONLY : io_lvl_plrn, scell_mat_plrn
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+
+
+      IMPLICIT NONE
+      INTEGER,  INTENT (IN) :: nrr_k, dims, ndegen_k(:,:,:)
+      REAL(DP), INTENT (IN) :: irvec_r(3, nrr_k)
+
+      INTEGER,  INTENT (IN) :: nrr_q, ndegen_q(:,:,:)
+      INTEGER,  INTENT (IN) :: irvec_q(3, nrr_q)
+
+      INTEGER,  INTENT (IN) :: nrws
+      REAL(DP), INTENT (IN) :: rws(:, :)
+
+      LOGICAL :: itsopen
+
+      ! Bqu Ank interpolation is not compatible with self-consistency process
+      ! Added by Chao Lian for polaron calculations flow select
+      ! If postprocess is ON, i.e. Bqu interpolation with saved dtau,
+      ! Ank interpolation with saved Amp, and polaron visulation with saved Wannier function cube files,
+      ! then self-consistent process is skipped.
+      IF(.NOT. (interp_Bqu_plrn .or. interp_Ank_plrn .or. cal_psir_plrn)) THEN
+         CALL polaron_scf(nrr_k, ndegen_k, irvec_r, nrr_q, ndegen_q, irvec_q, rws, nrws, dims)
+         IF (io_lvl_plrn == 1) CALL close_buffer(iepfall, 'KEEP')
+         CALL close_buffer(ihamil, 'delete')
+         !!JLB
+         !CLOSE(UNIT=iUmn)
+         !CLOSE(UNIT=iekanu)
+         !!JLB
+      END IF
+
+      IF(interp_Ank_plrn) THEN
+         IF(ionode) WRITE(stdout, "(5x, 'Interpolating the Ank at given k-point set....')")
+         CALL interp_plrn_wf(nrr_k, ndegen_k, irvec_r, dims)
+      END IF
+
+      IF(interp_Bqu_plrn) THEN
+         IF(ionode) THEN
+            WRITE(stdout, "(5x, 'Interpolating the Bqu at given q-point set....')")
+         END IF
+         CALL interp_plrn_bq(nrr_q, ndegen_q, irvec_q, rws, nrws)
+      END IF
+
+      IF(cal_psir_plrn) THEN
+         IF(ionode) WRITE(stdout, "(5x, 'Calculating the real-space distribution of polaron wavefunction....')")
+         IF (scell_mat_plrn) THEN
+            CALL scell_write_real_space_wavefunction()
+         ELSE
+            CALL write_real_space_wavefunction()
+         END IF
+      END IF
+
+
+      !Clean up the allocated arrays, close open files
+      !inquire(unit=iepfall, opened=itsopen)
+      !if (itsopen) CLOSE(iepfall)
+
+      IF (ALLOCATED(is_mirror_k))      DEALLOCATE(is_mirror_k)
+      IF (ALLOCATED(is_mirror_q))      DEALLOCATE(is_mirror_q)
+      IF (ALLOCATED(is_tri_q))         DEALLOCATE(is_tri_q) !JLB
+      IF (ALLOCATED(is_tri_k))         DEALLOCATE(is_tri_k) !JLB
+      IF (ALLOCATED(Hamil))            DEALLOCATE(Hamil)
+      IF (ALLOCATED(eigVec))           DEALLOCATE(eigVec)
+      IF (ALLOCATED(hEigVec))          DEALLOCATE(hEigVec)
+      IF (ALLOCATED(kpg_map))          DEALLOCATE(kpg_map)
+      IF (ALLOCATED(etf_all))          DEALLOCATE(etf_all)
+      IF (ALLOCATED(xkf_all))          DEALLOCATE(xkf_all)
+      IF (ALLOCATED(select_bands_plrn))DEALLOCATE(select_bands_plrn)
+      IF (ALLOCATED(gq_model))         DEALLOCATE(gq_model)
+      !IF (ALLOCATED(Rp_array))         DEALLOCATE(Rp_array)
+
+      WRITE(stdout, '(/5x, "======================== Polaron Timers ===========================")')
+      CALL print_clock('main_prln')
+      CALL print_clock('find_k+q')
+      CALL print_clock('plrn_prepare')
+      CALL print_clock('write_files')
+      CALL print_clock('Bqu_tran')
+      CALL print_clock('Ank_trans')
+      CALL print_clock('cal_E_Form')
+      CALL print_clock('DiagonH')
+
+      CALL print_clock('Setup_H')
+      CALL print_clock('H_alloc')
+      CALL print_clock('read_gmat')
+      CALL print_clock('read_Hmat')
+      CALL print_clock('Write_Hmat')
+      CALL print_clock('HOffDiagTerm')
+      CALL print_clock('HdiagTerm')
+      CALL print_clock( 'cegterg' )
+      CALL print_clock( 'cegterg:init' )
+      CALL print_clock( 'cegterg:diag' )
+      CALL print_clock( 'cegterg:update' )
+      CALL print_clock( 'cegterg:overlap' )
+      CALL print_clock( 'cegterg:last' )
+      CALL print_clock('ik_l2g')
+      CALL print_clock('cal_bqu')
+      CALL print_clock('init_Ank')
+      CALL print_clock('find_EVBM')
+      CALL print_clock('re_omega')
+      CALL print_clock('cal_hpsi')
+      WRITE(stdout, '(5x, "===================================================================")')
+   END SUBROUTINE
+   !
+   SUBROUTINE polaron_scf (nrr_k, ndegen_k, irvec_r, nrr_q, ndegen_q, irvec_q, rws, nrws, dims)
+      !
+      ! Self consistency calculation of polaron wavefunction.
+      ! Rewritten by Chao Lian based on the implementation by Denny Sio.
+      !
+      USE modes,         ONLY : nmodes
+      USE constants_epw, ONLY : ryd2mev, one, ryd2ev, two, zero, twopi
+      USE constants_epw, ONLY : czero, cone, pi, ci, twopi, eps6, eps8, eps5
+      USE epwcom,        ONLY : type_plrn, full_diagon_plrn, lifc, debug_plrn
+      USE epwcom,        ONLY : init_sigma_plrn, init_k0_plrn
+      USE epwcom,        ONLY : nstate_plrn, conv_thr_plrn
+      USE epwcom,        ONLY : mixing_Plrn, init_plrn, niter_plrn, restart_plrn
+      USE epwcom,        ONLY : nkf1, nkf2, nkf3, seed_plrn
+      USE epwcom,        ONLY : nqf1, nqf2, nqf3, r0_plrn
+      USE epwcom,        ONLY : efermi_read, fermi_energy, nbndsub
+      USE epwcom,        ONLY : model_vertex_plrn, time_rev_A_plrn
+      USE epwcom,        ONLY : beta_plrn, Mmn_plrn, recal_Mmn_plrn
+      USE epwcom,        ONLY : model_enband_plrn, model_phfreq_plrn, model_vertex_plrn
+      USE epwcom,        ONLY : omega_LO_plrn, kappa_plrn, m_eff_plrn
+      USE epwcom,        ONLY : scell_mat_plrn, as
+      USE epwcom,        ONLY : init_ntau_plrn
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE elph2,         ONLY : etf, ibndmin, ibndmax
+      USE elph2,         ONLY : nkqf, nkf, nqf, nqtotf, nktotf
+      USE elph2,         ONLY : xkf, wf, xkq, chw
+      USE elph2,         ONLY : cu, cuq
+      USE mp_global,     ONLY : inter_pool_comm
+      USE cell_base,     ONLY : bg, alat
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE poolgathering, ONLY : poolgather2
+      USE ions_base,     ONLY : nat
+      USE mp_world,      ONLY : mpime, world_comm
+      USE epwcom,        ONLY : ethrdg_plrn
+      USE io_var,        ONLY : iunRpscell
+
+
+      IMPLICIT NONE
+
+      INTEGER,  INTENT (IN) :: nrr_k, dims, ndegen_k(:,:,:)
+      REAL(DP), INTENT (IN) :: irvec_r(3, nrr_k)
+
+      INTEGER,  INTENT (IN) :: nrr_q, ndegen_q(:,:,:)
+      INTEGER,  INTENT (IN) :: irvec_q(3, nrr_q)
+
+      INTEGER,  INTENT (IN) :: nrws
+      REAL(DP), INTENT (IN) :: rws(:, :)
+      ! local variables
+      CHARACTER(LEN=256) :: filename, tmpch
+
+      LOGICAL  :: debug, file_exist
+
+      INTEGER  :: inu, iq, ik, ikk, jk, iibnd, jjbnd, ibnd, jbnd, ikq, ik_global, iplrn, ierr, iRp, itau
+      INTEGER  :: iter, icount, ix, iy, iz, start_mode, idos, iatm, indexkn1, indexkn2
+      INTEGER  :: nkf1_p, nkf2_p, nkf3_p, nbnd_plrn_p, nbndsub_p, nPlrn_p, nktotf_p, iqpg, ikpg
+      INTEGER  :: dos_file, wan_func_file, bloch_func_file, bmat_file, dtau_file, itemp, jtemp
+      INTEGER  :: ngrid(1:3)
+
+      COMPLEX(DP),  ALLOCATABLE :: Bmat_save(:,:)
+      COMPLEX(DP),  ALLOCATABLE :: eigvec_wan(:, :), eigvec_wan_save(:, :)
+      COMPLEX(DP),  ALLOCATABLE :: dtau(:, :), dtau_save(:, :), dtau_list(:, :, :)
+
+
+
+      COMPLEX(KIND=dp) :: cufkk ( nbndsub, nbndsub ), cfac(nrr_k, dims, dims), ctemp
+
+      REAL(dp) :: estmteRt(nstate_plrn),  eigVal(nstate_plrn), esterr, eb, EPlrnTot, EPlrnElec, EPlrnPhon, EPlrnBeta, EPlrnDisp
+      REAL(dp) :: xxk(3), xxq(3), shift(3), rtemp, disK, disK_t, prefac, norm, r_cry(1:3)
+      REAL(dp) :: totVal_save, b_vec(1:3), dtau_diff
+
+      ALLOCATE(dtau(nktotf, nmodes), STAT = ierr)
+      ALLOCATE(dtau_save(nktotf, nmodes), STAT = ierr)
+      dtau = czero
+      dtau_save = czero
+      IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating dtau', 1)
+      b_vec(1:3) = (/one/nqf1, one/nqf2, one/nqf3/)
+      debug = debug_plrn
+
+      CALL start_clock('main_prln')
+      !! Gather all the eigenvalues to determine the EBM/VBM,
+
+      CALL start_clock('re_omega')
+      !! Recalculate the frequency, when restart from save g
+      CALL cal_phonon_eigenfreq(nrr_q, irvec_q, ndegen_q, rws, nrws, wf)
+      IF(model_phfreq_plrn) THEN
+         wf = zero
+         wf(nmodes, :) = omega_LO_plrn
+      END IF
+
+
+      CALL stop_clock('re_omega')
+
+
+      !! Initialize Ac(k) based on profile
+      !! TODO: ik_bm should be user-adjustable
+      CALL start_clock('init_Ank')
+      eigVec = czero
+      SELECT CASE (init_plrn)
+         CASE (1)
+            ! If k0 has not been set on input, center gaussian at band edge
+            IF (ALL(init_k0_plrn(:)==1000.d0)) init_k0_plrn = xkf_all(1:3, ik_edge)
+            !
+            WRITE(stdout, '(5x, "Initializing polaron wavefunction using Gaussian wave &
+               &packet with a width of", ES14.6)') init_sigma_plrn
+            WRITE(stdout, '(5x, "centered at k=", 3f14.6)') init_k0_plrn !xkf_all(1:3, ik_edge)
+            CALL init_plrn_gaussian((/zero, zero, zero/), xkf_all, init_k0_plrn, eigVec)
+         CASE (3)
+            ALLOCATE(eigvec_wan(nktotf*nbnd_plrn, nstate_plrn))
+            WRITE(stdout, '(5x, a)') "Initializing the polaron wavefunction with previously saved Amp.plrn file"
+            CALL read_plrn_wf(eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p, 'Amp.plrn', scell_mat_plrn)
+            CALL plrn_eigvec_tran('Wan2Bloch', time_rev_A_plrn, eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nbndsub_p, &
+               nrr_k, ndegen_k, irvec_r, dims, eigVec)
+            DEALLOCATE(eigvec_wan)
+         !JLB
+         CASE (6)
+            WRITE(stdout, '(5x, a, I6)') "Starting from displacements read from file; number of displacements:", init_ntau_plrn
+            !
+            ALLOCATE(dtau_list(init_ntau_plrn, nktotf, nmodes), STAT = ierr)
+            IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating dtau_list', 1)
+            dtau_list = CMPLX(0.d0, 0.d0)
+            !
+            IF (init_ntau_plrn==1) THEN
+               !write (filename, '(A14)') 'dtau_disp.plrn'
+               filename = 'dtau_disp.plrn'
+               CALL read_plrn_dtau(dtau, nqf1, nqf2, nqf3, nqtotf, nmodes, filename, scell_mat_plrn)
+               dtau_list(1, :, :) = dtau(:, :)
+            ELSE
+               DO itau = 1, init_ntau_plrn
+                  write (tmpch,'(I4)') itau
+                  filename=TRIM('dtau_disp.plrn_'//ADJUSTL(tmpch))
+                  CALL read_plrn_dtau(dtau, nqf1, nqf2, nqf3, nqtotf, nmodes, filename, scell_mat_plrn)
+                  dtau_list(itau, :, :) = dtau(:, :)
+               END DO
+            END IF
+            !
+            CALL mp_bcast(dtau_list, meta_ionode_id, world_comm)
+            ! Initialize Ank wavefunction for iterative diagonalization
+            ! Gaussian
+            ! If k0 has not been set on input, center gaussian at band edge
+            IF (ALL(init_k0_plrn(:)==1000.d0)) init_k0_plrn = xkf_all(1:3, ik_edge)
+            WRITE(stdout, '(5x, "Initializing polaron wavefunction using Gaussian wave &
+            &packet with the width of", f15.7)') init_sigma_plrn
+            WRITE(stdout, '(5x, "centered at k=", 3f15.7)') init_k0_plrn !xkf_all(1:3, ik_edge)
+            CALL init_plrn_gaussian((/zero, zero, zero/), xkf_all, init_k0_plrn, eigVec)
+            CALL norm_plrn_wf(eigVec, REAL(nktotf, DP))
+         CASE DEFAULT
+            CALL errore('polaron_scf','init_plrn not implemented!', 1)
+      END SELECT
+
+      !! Only keep the coefficients in lowest/highest band,
+      !! since the electron/hole localized at this band will be more stable.
+      IF(init_plrn <= 2) THEN
+         DO ik = 1, nktotf
+            DO ibnd = 1, nbnd_plrn
+               indexkn1 = (ik-1)*nbnd_plrn + ibnd
+               IF(select_bands_plrn(ibnd) /= band_pos)  &
+                  eigVec(indexkn1, 1:nstate_plrn) = czero
+            END DO
+         END DO
+         CALL norm_plrn_wf(eigVec, REAL(nktotf, DP))
+      END IF
+      CALL stop_clock('init_Ank')
+
+      IF(debug_plrn) THEN
+         IF(ALLOCATED(eigvec_wan)) DEALLOCATE(eigvec_wan)
+         ALLOCATE(eigvec_wan(nktotf*nbnd_plrn, nstate_plrn))
+         DO ik = 1, nktotf
+            DO ibnd = 1, nbnd_plrn
+               eigvec_wan(ik + (ibnd-1)*nktotf, 1:nstate_plrn) &
+                  =  eigVec((ik - 1)*nbnd_plrn + ibnd, 1:nstate_plrn)
+            END DO
+         END DO
+         DEALLOCATE(eigvec_wan)
+      END IF
+
+
+      WRITE(stdout, '(5x, "Starting the SCF cycles")')
+      IF (full_diagon_plrn) THEN
+         WRITE(stdout, '(5x, a)') "Using serial direct diagonalization"
+      ELSE
+         WRITE(stdout, '(5x, a)') "Using parallel iterative diagonalization"
+         WRITE(stdout, '(5x, "Diagonalizing polaron Hamiltonian with a threshold of ", ES18.6)') ethrdg_plrn
+         WRITE(stdout, '(5x, "Please check the results are convergent with this value")')
+      END IF
+
+      WRITE(stdout, '(/5x, a)') "Starting the self-consistent process"
+      WRITE(stdout, '( 5x, a)') REPEAT('-',80)
+!      WRITE(stdout, '(14x, 60a15)') " phonon/eV    electron/eV     Formation/eV    esterr      Eigenvalues/eV", &
+!                                   "         i           j             k"
+      WRITE(stdout, '(5x, " iter", 60a15)') "  Eigval/eV", "Phonon/eV", "Electron/eV", & 
+                                            "Formation/eV", "Error/eV"
+
+      ALLOCATE(Bmat(nqtotf, nmodes))
+      ALLOCATE(eigvec_wan(nktotf*nbnd_plrn, nstate_plrn))
+      ALLOCATE(eigvec_wan_save(nktotf*nbnd_plrn, nstate_plrn))
+
+      !JLB
+      IF (scell_mat_plrn) THEN
+         CALL read_Rp_in_S()
+      END IF
+      !JLB
+
+      !JLB: possibility of multiple displacements read from file, to calculate polaron energy landscape.
+      !     Calculate and print the energies; .plrn files written to disk for last calculation only.
+      DO itau = 1, init_ntau_plrn ! ntau_plrn=1 by default
+
+         IF (init_plrn==6) dtau(:, :) = dtau_list(itau, :, :)
+
+         eigvec_wan = czero
+         eigvec_wan_save = czero
+
+         estmteRt = 1E3
+         esterr = 1E5
+         DO iter = 1, niter_plrn
+            ! Enforce the relation A_k = A*_{G-k} and normalize |A| = 1
+            ! Calculating $$ B_{\bq\nu} = \frac{1}{\omega_{\bq,\nu} N_p} \sum_\bk A^\dagger_{\bk+\bq} g_\nu(\bk,\bq) A_\bk $$
+            CALL start_clock('cal_bqu')
+            ! JLB
+            IF(init_plrn == 6 .AND. iter==1) THEN
+               Bmat = czero
+               IF (scell_mat_plrn) THEN
+                  CALL scell_plrn_bmat_tran('Dtau2Bmat', .true., dtau, nqtotf, nRp, Rp, nrr_q, ndegen_q, irvec_q, rws, nrws, Bmat)
+               ELSE
+                  CALL plrn_bmat_tran('Dtau2Bmat', .true., dtau, nqf1, nqf2, nqf3, nrr_q, ndegen_q, irvec_q, rws, nrws, Bmat)
+               END IF
+            ELSE
+               CALL build_plrn_bmat(Bmat, iter==1)
+               !
+               IF (scell_mat_plrn) THEN
+                  CALL scell_plrn_bmat_tran('Bmat2Dtau', .true., Bmat, nqtotf, nRp, Rp, nrr_q, ndegen_q, irvec_q, rws, nrws, dtau)
+               ELSE
+                  CALL plrn_bmat_tran('Bmat2Dtau', .true., Bmat, nqf1, nqf2, nqf3, nrr_q, ndegen_q, irvec_q, rws, nrws, dtau)
+               END IF
+               !
+            END IF
+
+            dtau_diff = MAXVAL(ABS(REAL(dtau - dtau_save)))
+            esterr = dtau_diff
+            IF(dtau_diff < conv_thr_plrn .and. iter > 1) THEN
+               IF(MAXVAL(ABS(REAL(dtau))) > alat/2.d0) THEN
+                    CALL errore("polaron_scf","Non-physical solution, check initial guess and convergence.", 1) 
+               END IF
+               ! converged, write the final value of eigenvalue
+               WRITE(stdout,'(5x,a)') REPEAT('-',80)
+               WRITE(stdout, '(5x,a,f10.6,a)' )  'End of self-consistent cycle'
+               EXIT
+            ELSE
+               dtau_save = dtau
+               !CALL plrn_bmat_tran('Dtau2Bmat', .true., dtau, nqf1, nqf2, nqf3, nrr_q, ndegen_q, irvec_q, rws, nrws, Bmat)
+            END IF
+            !
+            CALL stop_clock('cal_bqu')
+
+            IF(debug_plrn) THEN
+               IF(ALLOCATED(Bmat_save)) DEALLOCATE(Bmat_save)
+               ALLOCATE(Bmat_save(nmodes, nqtotf))
+               Bmat_save = czero
+               DO inu = 1, nmodes
+                  DO iq = 1, nqtotf
+                     Bmat_save(inu, iq) = Bmat(iq, inu)*wf(inu, iq)
+                  END DO
+               END DO
+               Bmat_save = czero
+            END IF
+            !
+            CALL start_clock('Setup_H')
+            CALL build_plrn_hamil(Bmat, Bmat_save, iter)
+            CALL stop_clock('Setup_H')
+
+            CALL start_clock('DiagonH')
+            ! For hole polaron (type_plrn = 1),
+            ! we need the highest eigenvalues instead of the lowest eigenvalues
+            ! To use KS_solver, which only gives the lowest eigenvalues,
+            ! we multiply -1 to the Hamiltonian to get the lowest eigenvalues
+            IF (full_diagon_plrn) THEN
+               ! Diagonalize Hamiltonian with Serial LAPACK subroutine
+               ! Used for testing or robust benchmark
+               CALL diag_serial(estmteRt, eigVec)
+            ELSE
+               ! Diagonalize Hamiltonian with Davidson Solver
+               CALL diag_parallel(estmteRt, eigVec)
+            END IF
+            CALL stop_clock('DiagonH')
+            !
+            ! Reverse the eigenvalues if it is the hole polaron
+            estmteRt(1:nstate_plrn) = (-type_plrn) * estmteRt(1:nstate_plrn)
+
+            ! enforce the time-reversal symmetry: A^T_k = A_k + A^*_{-k}
+            IF(time_rev_A_plrn) CALL check_time_rev_sym(eigVec)
+            CALL norm_plrn_wf(eigVec, REAL(nktotf, dp))
+
+            eigVec = (- type_plrn) * eigVec
+            !esterr = MAXVAL(ABS(estmteRt(1:nstate_plrn) - eigVal(1:nstate_plrn)))
+
+            IF(debug_plrn) THEN
+               IF(ALLOCATED(eigvec_wan)) DEALLOCATE(eigvec_wan)
+               ALLOCATE(eigvec_wan(nktotf*nbnd_plrn, nstate_plrn))
+               DO ik = 1, nktotf
+                  DO ibnd = 1, nbnd_plrn
+                     eigvec_wan(ik + (ibnd-1)*nktotf, 1:nstate_plrn) &
+                        =  eigVec((ik - 1)*nbnd_plrn + ibnd, 1:nstate_plrn)
+                  END DO
+               END DO
+               DEALLOCATE(eigvec_wan)
+            END IF
+
+            CALL start_clock('cal_E_Form')
+            CALL calc_form_energy(EPlrnPhon, EPlrnElec, EPlrnBeta)
+            CALL stop_clock('cal_E_Form')
+
+            ! TODO : use exact number instead of 20 in 20e15.7
+            r_cry(1:3) = IMAG(LOG(berry_phase(1:3) * EXP(- twopi * ci * r0_plrn(1:3))))/twopi
+            r_cry(1:3) = r_cry(1:3) - NINT(r_cry(1:3))
+            WRITE(stdout, '(5x, i5, 60e15.4)') iter, &
+               estmteRt(1:nstate_plrn)*ryd2ev, EPlrnPhon*ryd2ev, &
+               - EPlrnElec*ryd2ev, (EPlrnElec + EPlrnPhon)*ryd2ev, &
+               esterr
+            eigVal = estmteRt
+            totVal_save = EPlrnElec + EPlrnPhon
+         END DO
+
+         !! Calculate and write the energies
+         WRITE(stdout, '(5x, a, 50f16.7)') '      Eigenvalue (eV): ', eigVal*ryd2ev
+         WRITE(stdout, '(5x, a, f16.7)')   '     Phonon part (eV): ', EPlrnPhon*ryd2ev
+         WRITE(stdout, '(5x, a, f16.7)')   '   Electron part (eV): ', EPlrnElec*ryd2ev
+         IF (init_plrn==6) THEN
+            WRITE(stdout, '(5x, a, f16.7)') 'Formation Energy at this \dtau (eV): ', ((- type_plrn)*eigval - EPlrnPhon)*ryd2ev
+         ELSE 
+            WRITE(stdout, '(5x, a, f16.7)')   'Formation Energy (eV): ', (EPlrnElec + EPlrnPhon)*ryd2ev
+         END IF
+
+      END DO ! init_ntau_plrn
+
+
+      !! Calculate and write Density of State of Bqnu and Ank
+      WRITE(stdout, '(5x, a)') "Calculating density of states to save in dos.plrn"
+      CALL calc_den_of_state(eigVec, Bmat)
+      !! Do Bloch to Wannier transform, with U matrix
+      CALL start_clock('Ank_trans')
+      WRITE(stdout, '(5x, a)') "Generating the polaron wavefunction in Wannier basis to save in Amp.plrn"
+
+      IF(ALLOCATED(eigvec_wan)) DEALLOCATE(eigvec_wan)
+      ALLOCATE(eigvec_wan(nbndsub * nktotf, nstate_plrn), STAT = ierr)
+
+      IF (ierr /= 0) CALL errore('polaron_scf', 'Error allocating eigvec_wan', 1)
+      eigvec_wan = czero
+      ! .true.
+      IF (scell_mat_plrn) THEN
+         CALL scell_plrn_eigvec_tran('Bloch2Wan',.true., eigVec, nktotf, nRp, Rp, nbndsub, nrr_k, &
+                 ndegen_k, irvec_r, dims, eigvec_wan)
+      ELSE
+         CALL plrn_eigvec_tran('Bloch2Wan',.true., eigVec, nkf1, nkf2, nkf3, nbndsub, nrr_k, &
+            ndegen_k, irvec_r, dims, eigvec_wan)
+      END IF
+      CALL stop_clock('Ank_trans')
+
+      !! Calculate displacements of ions dtau, which is B matrix in Wannier basis
+      CALL start_clock('Bqu_tran')
+      dtau = czero
+      WRITE(stdout, '(5x, a)') "Generating the ionic displacements to save in dtau.plrn and dtau.plrn.xsf"
+      IF (scell_mat_plrn) THEN
+         CALL scell_plrn_bmat_tran('Bmat2Dtau', .true., Bmat, nqtotf, nRp, Rp, nrr_q, ndegen_q, irvec_q, rws, nrws, dtau)
+      ELSE
+         CALL plrn_bmat_tran('Bmat2Dtau', .true., Bmat, nqf1, nqf2, nqf3, nrr_q, ndegen_q, irvec_q, rws, nrws, dtau)
+      END IF
+      CALL stop_clock('Bqu_tran')
+      CALL start_clock('write_files')
+
+      IF(ionode) THEN
+         !! Write Amp in Wannier basis
+         CALL write_plrn_wf(eigvec_wan, 'Amp.plrn')
+
+         !! Write Ank in Bloch basis
+         CALL write_plrn_wf(eigvec,     'Ank.plrn',  etf_all)
+
+         !! Write Bqnu
+         CALL write_plrn_bmat(Bmat, 'Bmat.plrn', wf)
+
+         !! Write dtau
+         CALL write_plrn_bmat(dtau, 'dtau.plrn')
+
+         !! Write dtau in a user-friendly format for visulization
+         IF (scell_mat_plrn) THEN
+            CALL scell_write_plrn_dtau_xsf(dtau, nqtotf, nRp, Rp, as, 'dtau.plrn.xsf')
+         ELSE
+            CALL write_plrn_dtau_xsf(dtau, nqf1, nqf2, nqf3, 'dtau.plrn.xsf')
+         END IF
+      END IF
+      CALL stop_clock('write_files')
+
+      ! clean up
+      DEALLOCATE(dtau)
+      DEALLOCATE(eigvec_wan)
+      DEALLOCATE(Bmat)
+      IF(ALLOCATED(Rp)) DEALLOCATE(Rp)
+
+      CALL stop_clock('main_prln')
+   END SUBROUTINE
+
+   !! Require etf_all to be properly initialized
+   SUBROUTINE calc_form_energy(EPlrnPhon, EPlrnElec, EPlrnBeta)
+      USE constants_epw,   ONLY : zero, czero, twopi, ci, two
+      USE modes,           ONLY : nmodes
+      USE elph2,           ONLY : xqf, wf, nqtotf, nktotf, nkf
+      USE epwcom,          ONLY : type_plrn, nstate_plrn, beta_plrn
+      USE mp,              ONLY : mp_sum
+      USE mp_global,       ONLY : inter_pool_comm
+
+      IMPLICIT NONE
+
+      REAL(dp), INTENT(OUT) :: EPlrnPhon, EPlrnElec, EPlrnBeta
+      REAL(dp):: prefix
+      INTEGER :: start_mode, inu, ibnd, jbnd, iplrn
+      INTEGER :: indexkn1, indexkn2, indexkn3
+      INTEGER :: ik, iq, ik_global, iq_global, idir
+      INTEGER :: ikmbi, ikpbi
+      COMPLEX(dp) :: Q_i, Mmn(2), ctemp(2)
+
+      !! Based on Eq. 41 of Ref. 2:
+      !! E_{f,ph} = 1/N_p \sum_{q\nu}|B_{q\nu}|^2\hbar\omega_{q\nu}
+      !! iq -> q, nqtotf -> N_p
+      !! Bmat(iq, inu) -> B_{q\nu}
+      !! wf(inu, iq) -> \hbar\omega_{q\nu}
+      EPlrnPhon = zero
+      DO iq = 1, nkf
+         iq_global = ikqLocal2Global(iq, nqtotf)
+         !JLB - Swapped indices!
+         !JLB: I think it would be better to discard modes by looking at the frequencies, i.e. discard negative or zero frequency modes.
+         IF(isGVec(xqf(1:3, iq_global))) THEN
+         !IF(isGVec(xqf(iq_global, 1:3))) THEN
+         !JLB
+            start_mode = 4
+         ELSE
+            start_mode = 1
+         END IF
+         DO inu = start_mode, nmodes
+            EPlrnPhon = EPlrnPhon - ABS(Bmat(iq_global, inu))**2 * (wf(inu, iq_global)/nqtotf)
+         END DO
+      END DO
+      CALL mp_sum(EPlrnPhon, inter_pool_comm)
+
+      !! E_{f,el} = 1/N_p \sum_{nk}|A_{nk}|^2(\epsilon_{nk}-\epsilon_{F})
+      !! indexkn1 -> nk, nktotf -> N_p
+      !! eigVec(indexkn1, iplrn) -> A_{nk}
+      !! etf_all(select_bands_plrn(ibnd), ik) - ef -> \epsilon_{nk}-\epsilon_{F}
+      EPlrnElec = zero
+      ! TODO: what should we do in iplrn
+      DO iplrn = 1, nstate_plrn
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nqtotf)
+            DO ibnd = 1, nbnd_plrn
+               indexkn1 = (ik_global - 1) * nbnd_plrn + ibnd
+               EPlrnElec = EPlrnElec - type_plrn * ABS(eigVec(indexkn1, iplrn))**2/nktotf*&
+                  etf_all(select_bands_plrn(ibnd), ik_global)
+            END DO
+         END DO
+      END DO
+      CALL mp_sum(EPlrnElec, inter_pool_comm)
+
+      EPlrnBeta = zero
+   END SUBROUTINE
+   !
+   !! type_plrn denotes whether electron polaron (-1) or hole polaron (+1)
+   !! Determine the Fermi energy, read from the input or calculated from band structure
+   SUBROUTINE find_band_extreme(type_plrn, etf_all, ik_bm, band_pos, efermi)
+      USE constants_epw, ONLY : zero, ryd2ev
+      USE epwcom,        ONLY : efermi_read, fermi_energy
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE elph2,         ONLY : nkqf, nkf, nqf, nqtotf, nktotf
+      USE mp,            ONLY : mp_max, mp_min, mp_sum
+      USE mp_global,     ONLY : inter_pool_comm, npool, my_pool_id
+
+      IMPLICIT NONE
+
+      INTEGER,  INTENT(IN)  :: type_plrn
+      REAL(DP), INTENT(IN)  :: etf_all(:,:)
+
+      INTEGER,  INTENT(OUT) :: ik_bm, band_pos
+      REAL(DP), INTENT(OUT) :: efermi
+
+      REAL(DP) :: band_edge, extreme_local(npool)
+      INTEGER :: ik, ik_global, k_extreme_local(npool), ipool(1)
+
+
+      !! type_plrn denotes whether electron polaron (-1) or hole polaron (+1)
+      IF ( type_plrn .eq. -1 ) THEN
+         band_pos = select_bands_plrn(1)
+      ELSE IF ( type_plrn .eq. 1 ) THEN
+         band_pos = select_bands_plrn(nbnd_plrn)
+      END IF
+      WRITE(stdout, '(5x, "The band extremes are at band ",  i0)') band_pos
+
+      !! Determine the Fermi energy, read from the input or calculated from band structure
+      ! = 1E4*(-type_plrn)
+      ik_bm = 0
+      k_extreme_local = 0
+      extreme_local = zero
+      IF(efermi_read) THEN
+         efermi = fermi_energy
+         WRITE(stdout, '(5x, "Polaron Reference energy (VBM or CBM) is read from the input file: ",&
+            &f16.6, " eV.")') efermi * ryd2ev
+      ELSE
+         IF(type_plrn == 1) THEN
+            efermi = -1E5
+         ELSE IF (type_plrn == -1) THEN
+            efermi =  1E5
+         ELSE
+            CALL errore('','Wrong type_plrn, should be 1 or -1', 1)
+         END IF
+
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            band_edge = etf_all(band_pos, ik_global)
+
+            IF (type_plrn == 1) THEN
+               !! For hole polaron (type_plrn = 1), find the highest eigenvalue
+               IF (band_edge > efermi) THEN
+                  efermi = band_edge
+                  ik_bm = ik_global
+               END IF
+            ELSE IF (type_plrn == -1) THEN
+               !! For electron polaron (type_plrn = -1), find the lowest eigenvalue
+               IF (band_edge < efermi) THEN
+                  efermi = band_edge
+                  ik_bm = ik_global
+               END IF
+            END IF
+         END DO
+
+         k_extreme_local(my_pool_id + 1) = ik_bm
+         extreme_local(my_pool_id + 1) = efermi
+         CALL mp_sum(k_extreme_local, inter_pool_comm)
+         CALL mp_sum(extreme_local, inter_pool_comm)
+
+         IF(type_plrn == 1) THEN
+            ipool = MAXLOC(extreme_local)
+            ik_bm = k_extreme_local(ipool(1))
+            efermi = MAXVAL(extreme_local)
+         ELSE IF (type_plrn == -1) THEN
+            ipool = MINLOC(extreme_local)
+            ik_bm = k_extreme_local(ipool(1))
+            efermi = MINVAL(extreme_local)
+         END IF
+      END IF
+   END SUBROUTINE
+   !
+   !! Gather all the eigenvalues to determine the EBM/VBM,
+   !! and calculate the density state of Ank and Bqnu
+   SUBROUTINE gather_band_eigenvalues(etf, etf_all)
+      USE epwcom,        ONLY : nbndsub
+      USE elph2,         ONLY : nkqf, nkf, nqf, nqtotf, nktotf
+      USE elph2,         ONLY : xkf, xqf, wf, xkq, chw
+      USE constants_epw, ONLY : zero
+      USE poolgathering, ONLY : poolgather2
+
+      IMPLICIT NONE
+
+      REAL(DP),     INTENT(IN)  :: etf(:,:)
+      REAL(DP),     INTENT(OUT) :: etf_all(:,:)
+
+      INTEGER :: ierr
+      REAL(DP),     ALLOCATABLE :: rtmp2(:,:)
+
+      ALLOCATE(rtmp2(nbndsub, nktotf*2), STAT = ierr)
+      IF (ierr /= 0) CALL errore('gather_band_eigenvalues', 'Error allocating rtmp2', 1)
+      rtmp2 = zero
+
+      CALL poolgather2 ( nbndsub, nktotf*2, nkqf, etf, rtmp2  )
+      etf_all(1:nbndsub, 1:nktotf) = rtmp2(1:nbndsub, 1:nktotf*2:2)
+
+      DEALLOCATE(rtmp2)
+   END SUBROUTINE
+   !
+   ! Calculate the phonon eigen frequencies. This is needed when restarting the polaron
+   ! calculation with recalculating el-ph vertex
+   SUBROUTINE cal_phonon_eigenfreq(nrr_q, irvec_q, ndegen_q, rws, nrws, wf)
+      USE modes,         ONLY : nmodes
+      USE elph2,         ONLY : xqf, xkq, chw
+      USE constants_epw, ONLY : zero, eps8, czero
+      USE wan2bloch,     ONLY : dynwan2bloch, dynifc2blochf
+      USE elph2,         ONLY : nkqf, nkf, nqf, nqtotf, nktotf
+      USE epwcom,        ONLY : type_plrn, full_diagon_plrn, lifc
+      USE io_global,     ONLY : ionode, stdout
+
+      IMPLICIT NONE
+
+      INTEGER,  INTENT (IN) :: nrr_q, ndegen_q(:,:,:)
+      INTEGER,  INTENT (IN) :: irvec_q(3, nrr_q)
+      INTEGER,  INTENT (IN) :: nrws
+      REAL(DP), INTENT (IN) :: rws(:, :)
+
+      REAL(DP), INTENT (OUT) :: wf(:, :)
+
+      COMPLEX(DP) :: uf(nmodes, nmodes)
+      REAL(DP)    :: w2(nmodes)
+
+      INTEGER  :: inu, ierr, iq
+      REAL(DP) :: xxq(3)
+      LOGICAL  :: mirror_q
+
+      uf = czero
+      w2 = zero
+
+      !TODO: make this part parallel over q
+      wf = zero
+      DO iq = 1, nqtotf
+         ! iq -> q
+         xxq = xqf(1:3, iq)
+         IF (.NOT. lifc) THEN
+            CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, uf, w2, is_mirror_q(iq))
+         ELSE
+            CALL dynifc2blochf(nmodes, rws, nrws, xxq, uf, w2, is_mirror_q(iq))
+         ENDIF
+         DO inu = 1, nmodes
+            IF (w2(inu) > -eps8) THEN
+               wf(inu, iq) =  DSQRT(ABS(w2(inu)))
+            ELSE
+               !JLB
+               IF (ionode) THEN
+                  WRITE(stdout, '(5x, "WARNING: Imaginary frequency mode ",&
+                  &I6, " at iq=", I6)') inu, iq
+               END IF
+               wf(inu, iq) = 0.d0
+               !wf(inu, iq) = -DSQRT(ABS(w2(inu)))
+               !JLB
+            ENDIF
+         END DO
+      END DO
+   END SUBROUTINE
+   !
+   SUBROUTINE init_plrn_random(eigVec)
+      USE elph2, ONLY : nktotf
+      USE constants_epw, ONLY : ci, cone
+      USE epwcom, ONLY : nstate_plrn
+
+      IMPLICIT NONE
+      COMPLEX(dp), INTENT(OUT) :: eigVec(:, :)
+      REAL(DP),     ALLOCATABLE :: rmat_tmp(:, :)
+
+      CALL RANDOM_SEED()
+      ALLOCATE(rmat_tmp(1:nktotf*nbnd_plrn, 1:nstate_plrn))
+      CALL RANDOM_NUMBER(rmat_tmp)
+      eigVec(1:nktotf*nbnd_plrn, 1:nstate_plrn) = cone*rmat_tmp(1:nktotf*nbnd_plrn, 1:nstate_plrn)
+      CALL RANDOM_NUMBER(rmat_tmp)
+      eigVec(1:nktotf*nbnd_plrn, 1:nstate_plrn) = eigVec(1:nktotf*nbnd_plrn, 1:nstate_plrn) + &
+                                                 &ci*rmat_tmp(1:nktotf*nbnd_plrn, 1:nstate_plrn)
+      DEALLOCATE(rmat_tmp)
+   END SUBROUTINE
+   !
+   SUBROUTINE init_plrn_gaussian(r0, xkf_all, k0, eigVec)
+      !!
+      !! Initialize Ank coefficients with a Gaussian lineshape
+      !!
+      USE constants_epw, ONLY : czero, cone, ci, twopi, one, zero
+      USE epwcom, ONLY : nstate_plrn, init_sigma_plrn
+      USE elph2, ONLY : nktotf
+      USE cell_base, ONLY : bg, alat
+
+      IMPLICIT NONE
+      REAL(dp), INTENT(IN) :: r0(3), k0(3), xkf_all(:, :)
+      COMPLEX(dp), INTENT(OUT) :: eigVec(:, :)
+
+
+      INTEGER :: ibnd, ik, iplrn, ix, iy, iz, indexkn1, indexkn2, ishift
+      REAL(dp) :: qcart(3), xxq(3), shift(3), disK
+      COMPLEX(dp) :: ctemp
+
+      ! Calculating $$ B_{\bq\nu} = \frac{1}{\omega_{\bq,\nu} N_p} \sum_\bk A^\dagger_{\bk+\bq} g_\nu(\bk,\bq) A_\bk $$
+      ! \sum_\bk in local 1 to nkf first, then a inter pool sum
+      ! eq to code: k -> ik, q -> iq, \nu -> inu
+      ! g_\nu(\bk,\bq) -> epfall(:,:, inu, ik, iq)
+      ! \omega_{\bq,\nu} -> wf(inu, iq)
+      ! A_\bk -> eigVec(ik,:), A_{\bk+\bq} -> eigVec(ikq,:)
+      ! B_{\bq\nu} -> Bmat(iq, inu)
+      ! Whole equation translate to: Bmat(iq, inu) = one/(wf(inu, iq)*nqtotf) \sum_\bk conj(eigVec(ikq,:)) * epfall(:,:, inu, ik, iq) * eigVec(ik,:)
+      ! call cal_Bmat(eigVec, wf, kpg_map, ikq_all, epfall, Bmat)
+      DO ik = 1, nktotf
+         xxq = xkf_all(1:3, ik) - (k0(:)-INT(k0(:))) ! shift k0 to 1BZ
+         CALL dgemv('n', 3, 3, one, bg, 3, xxq, 1, zero, qcart, 1)
+         ctemp = EXP( -ci* twopi * DOT_PRODUCT( qcart, r0 ))
+         disK = -1
+         ! Ensure periodicity of Ank checking distance to other equivalent BZ points
+         DO ishift = 1, 27
+            shift(1:3) = REAL(index_shift(ishift), KIND=DP)
+            CALL dgemv('n', 3, 3, one, bg, 3, xxq+shift, 1, zero, qcart, 1)
+            disK = MAX(disK, EXP(-init_sigma_plrn * NORM2(qcart)*twopi/alat )) ! for sigma to be in bohr
+            !disK = MAX(disK, EXP(-init_sigma_plrn * NORM2(xxq + shift)))
+         END DO
+         !
+         DO ibnd = 1, nbnd_plrn
+            indexkn1 = (ik - 1)*nbnd_plrn + ibnd
+            eigVec(indexkn1, :) = CONE * disK * ctemp
+         END DO
+      END DO
+   END SUBROUTINE
+
+   SUBROUTINE build_plrn_bmat(Bmat, first)
+      USE elph2,         ONLY : nkf, nqtotf, wf, xqf, nktotf, etf
+      USE epwcom,        ONLY : model_vertex_plrn, nbndsub, debug_plrn
+      USE epwcom,        ONLY : nstate_plrn, mixing_Plrn, type_plrn
+      USE epwcom,        ONLY : g_start_energy_plrn, g_end_energy_plrn, g_start_band_plrn
+      USE epwcom,        ONLY : model_enband_plrn, model_phfreq_plrn, model_vertex_plrn
+      USE epwcom,        ONLY : omega_LO_plrn, kappa_plrn, m_eff_plrn
+      USE epwcom,        ONLY : io_lvl_plrn
+      USE constants_epw, ONLY : czero, one, two, zero, cone, eps2, eps8 !JLB
+      USE mp_world,      ONLY : mpime, world_comm
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE modes,         ONLY : nmodes
+
+      IMPLICIT NONE
+
+      COMPLEX(DP), INTENT(OUT) :: Bmat(:, :)
+      LOGICAL, INTENT(IN) :: first
+      INTEGER :: iq, ik, ikq, ik_global, ibnd, jbnd, iplrn, indexkn1, indexkn2, indexkn3
+      INTEGER :: start_mode, inu, iqpg
+      COMPLEX(DP) :: prefac, ctemp
+      !JLB
+      INTEGER :: jnu, ndegen(nmodes)
+      COMPLEX(DP) :: Bmat_tmp(nmodes)
+      REAL(DP) :: eig
+
+      Bmat = czero
+      !Bmat_comp = czero
+      !REWIND(iepfall)
+      DO iq = 1, nqtotf
+         IF(model_vertex_plrn) THEN
+            epf = czero
+            epf(1, 1, nmodes, 1:nkf) = gq_model(iq)
+         ELSE
+            IF(io_lvl_plrn == 0) THEN
+               epf(:, :, :, :) = epfall(:, :, :, :, iq)
+            ELSE IF (io_lvl_plrn == 1) THEN
+               CALL get_buffer(epf, lword_g, iepfall, iq)
+            END IF
+            !READ(iepfall) epf(1:nbnd_plrn, 1:nbnd_plrn, 1:nmodes, 1:nkf)
+         END IF
+         IF(test_tags_plrn(1)) THEN
+            epf = 1E-6
+            epf(:, :, nmodes, :) = 2E-2
+         ELSE IF (test_tags_plrn(2)) THEN
+            epf = ABS(epf)
+         ELSE IF (test_tags_plrn(3)) THEN
+             IF(NORM2(xqf(:,iq)) > 1E-5) epf(:, :, nmodes, :) = 0.005/NORM2(xqf(:,iq))
+         END IF
+         !
+         ! energy cutoff for g
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            DO ibnd = 1, nbnd_g_plrn
+               eig = etf_all(ibnd + g_start_band_plrn - 1, ik_global)
+               IF (eig < g_start_energy_plrn .OR. eig > g_end_energy_plrn) THEN
+                  !print *, "triggered"
+                   epf(ibnd, :, :, ik) = czero
+                   epf(:, ibnd, :, ik) = czero
+               END IF
+            END DO
+         END DO
+         !
+         iqpg = kpg_map(iq)
+         ! if iq is the gamma point, the first three modes should be
+         ! dropped because wf(q=0, 1:3) will be zero
+         IF (isGVec(xqf(1:3, iq))) THEN
+            start_mode = 4
+         ELSE
+            start_mode = 1
+         ENDIF
+         !
+         IF( iq > iqpg ) THEN
+            DO inu = start_mode, nmodes!
+               ! Enforce the relation B_q = B*_{G-q}
+               !JLB
+               Bmat(iq, inu) = CONJG(Bmat(iqpg, inu))
+               !JLB
+               !Bmat(iq, inu)   = cal_Bmat(iq, inu)
+               !Bmat(iqpg, inu) = CONJG(Bmat(iq, inu))
+            END DO
+         !ELSE IF (iq == iqpg) THEN
+         !   DO inu = start_mode, nmodes!
+         !      ! Enforce the relation B_q = B*_{G-q}
+         !      Bmat(iq, inu)   = REAL(cal_Bmat(iq, inu))
+         !   END DO
+         ELSE
+            DO inu = start_mode, nmodes
+               Bmat(iq, inu)   = cal_Bmat(iq, inu)
+            END DO
+         !JLB
+         END IF
+         !TODO: to be consistent with Denny, whether this is correct?
+         ! IF ( ABS(wf(1, iq)) < eps2 ) Bmat(iq, 1) = czero
+      END DO
+      ! cal_Bmat only sum over local k, so we have to do mp_sum
+      CALL mp_sum( Bmat, inter_pool_comm )
+   END SUBROUTINE
+
+   SUBROUTINE build_plrn_hamil(Bmat, Bmat_save, iter)
+      USE elph2,         ONLY : nkf, nqtotf, wf, xqf, nktotf, etf
+      USE epwcom,        ONLY : model_vertex_plrn, nbndsub
+      USE epwcom,        ONLY : nstate_plrn, mixing_Plrn, type_plrn
+      USE epwcom,        ONLY : nhblock_plrn, r0_plrn, beta_plrn
+      USE epwcom,        ONLY : nqf1, nqf2, nqf3
+      USE epwcom,        ONLY : g_start_energy_plrn, g_end_energy_plrn, g_start_band_plrn
+      USE epwcom,        ONLY : io_lvl_plrn
+      USE constants_epw, ONLY : czero, one, two, zero, cone, eps2, eps8, twopi, ci
+      USE mp_world,      ONLY : mpime, world_comm
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE modes,         ONLY : nmodes
+      USE cell_base,     ONLY : at, alat
+
+      IMPLICIT NONE
+
+      !COMPLEX(DP), INTENT(OUT) :: Hamiltonian(:)
+      COMPLEX(DP), INTENT(IN) :: Bmat_save(:,:), Bmat(:,:)
+      INTEGER, INTENT(IN) :: iter
+      INTEGER :: iq, ik, ikq, ik_global, ibnd, jbnd, iplrn
+      INTEGER :: indexkn1, indexkn2, indexkn3, indexkn4, indexkn5
+      INTEGER :: start_mode, inu, iqpg, index_blk, index_loc, idir, jdir, ialpha
+      INTEGER :: ibpi, ibmi, ibpj, ibmj, ivec, ibm, ikpbi, ikmbi, ikpbj, ikmbj
+      COMPLEX(DP) :: prefac, ctemp, Q_i, Q_j, Mmn(1:4)
+      COMPLEX(DP), ALLOCATABLE :: Bmat_comp(:,:), Hamil_tmp(:, :)
+      LOGICAL, ALLOCATABLE :: saved(:)
+
+      REAL(DP) :: F_mat(1:3,1:3), b_vec(1:3), eta(1:3), a_i, a_j, eig
+
+      test_tags_plrn(1) = .false.
+      test_tags_plrn(2) = .false.
+      test_tags_plrn(3) = .false.
+
+
+
+      ! To avoid the zero phonon frequency
+      ! IF(wf(1, iq) < eps2) Bmat(iq, 1) = czero
+
+      !IF(.NOT. first) Bmat = mixing_Plrn*Bmat + (1 - mixing_Plrn) * Bmat_save
+
+      ! allocate(saved(1:nkf*nbnd_plrn))
+      ! saved(1:nkf*nbnd_plrn) = .false.
+      ! call start_clock('H_alloc')
+
+
+      ! if(.not. mem_save_h) then
+      ! allocate(Hamil_tmp(1:lword_h, blocksize))
+      ! Hamil_tmp = czero
+      ! end if
+      ! call stop_clock('H_alloc')
+
+      !
+      ! Calculate the Hamiltonian with Bq $$H_{n\bk,n'\bk'} = \delta_{n\bk,n'\bk'}\varepsilon_{n\bk} -\frac{2}{N_p} \sum_{\nu} B^*_{\bq,\nu}g_{nn'\nu}(\bk',\bq)$$
+      ! H_{n\bk,n'\bk'} -> Hamil(ik, ibnd, ikq, jbnd)
+      ! B^*_{\bq,\nu} -> conj(Bmat(iq, inu))
+      ! g_{nn'\nu}(\bk',\bq) -> epf(ibnd, jbnd, inu, ikq, iq)
+      ! if q == 0, \delta_{n\bk,n'\bk'}\varepsilon_{n\bk} is diagonal matrix with \varepsilon_{n\bk}
+
+      ! G == (0,0,0) means this is the diagonal term with k=k'
+      ! ikq is the global index, i.e. the second index
+      ! ik is the local index, i.e. the first index
+      Hamil = czero
+      !print *, "hblocksize, nhblock_plrn, nkf * nbnd_plrn:", hblocksize, nhblock_plrn, nkf * nbnd_plrn
+
+
+
+      DO iq = 1, nqtotf
+         IF(model_vertex_plrn) THEN
+            epf = czero
+            epf(1, 1, nmodes, 1:nkf) = gq_model(iq)
+         ELSE
+            CALL start_clock('read_gmat') !nbndsub*nbndsub*nmodes*nkf
+            IF(io_lvl_plrn == 0) THEN
+               epf(:, :, :, :) = epfall(:, :, :, :, iq)
+            ELSE IF (io_lvl_plrn == 1) THEN
+               CALL get_buffer(epf, lword_g, iepfall, iq)
+            END IF
+            CALL stop_clock('read_gmat')
+         ENDIF
+         ! energy cutoff of g
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            DO ibnd = 1, nbnd_g_plrn
+               eig = etf_all(ibnd + g_start_band_plrn - 1, ik_global)
+               IF (eig < g_start_energy_plrn .OR. eig > g_end_energy_plrn) THEN
+                  !print *, "triggered"
+                   epf(ibnd, :, :, ik) = czero
+                   epf(:, ibnd, :, ik) = czero
+               END IF
+            END DO
+         END DO
+
+         DO ik = 1, nkf
+            ikq = ikq_all(ik, iq)
+
+            DO ibnd = 1, nbnd_plrn
+               indexkn1 = (ik - 1)*nbnd_plrn + ibnd
+
+               IF(nhblock_plrn == 1) THEN
+                  index_loc = indexkn1
+                  index_blk = 1
+               ELSE
+                  index_loc = MOD(indexkn1 - 1, hblocksize) + 1
+                  index_blk = INT((indexkn1 - 1) / hblocksize) + 1
+               END IF
+
+               IF(iq /= 1 .and. index_loc == 1 .and. nhblock_plrn /= 1) THEN
+                  CALL start_clock('read_Hmat')
+                  CALL get_buffer(Hamil, lword_h, ihamil, index_blk)
+                  CALL stop_clock('read_Hmat')
+               END IF
+
+               CALL start_clock('HdiagTerm')
+               IF (isGVec(xqf(1:3, iq))) THEN
+                  ! Note that, ik is local index while ikq is global index,
+                  ! so even when q=0, ik \= ikq, but ik_global == ikq
+                  ! delta_{nn' kk'} epsilon_{nk}
+                  ctemp = etf_all(select_bands_plrn(ibnd), ikq)
+                  indexkn2 = (ikq - 1)*nbnd_plrn + ibnd
+                  Hamil(indexkn2, index_loc) = Hamil(indexkn2, index_loc) + ctemp
+               END IF
+               CALL stop_clock('HdiagTerm')
+
+               CALL start_clock('HOffDiagTerm')
+               DO jbnd = 1, nbnd_plrn
+                  indexkn2 = (ikq - 1) * nbnd_plrn + jbnd
+                  DO inu = 1, nmodes
+                     ctemp = type_plrn * two/REAL(nqtotf,KIND=dp)*(Bmat(iq, inu))*&
+                        CONJG(epf(select_bands_plrn(jbnd) - g_start_band_plrn + 1, &
+                              select_bands_plrn(ibnd)- g_start_band_plrn + 1, inu, ik))
+                     Hamil(indexkn2, index_loc) = Hamil(indexkn2, index_loc) + ctemp
+                  END DO
+               END DO
+               CALL stop_clock('HOffDiagTerm')
+               IF(nhblock_plrn /= 1) THEN
+                  IF((index_loc == hblocksize .or. indexkn1 == nkf*nbnd_plrn)) THEN
+                     CALL start_clock('Write_Hmat')
+                     CALL save_buffer(Hamil, lword_h, ihamil, index_blk)
+                     Hamil = czero
+                     CALL stop_clock('Write_Hmat')
+                  END IF
+               END IF
+            END DO !ibnd
+         END DO !ik
+      END DO ! iq
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   ! Return true if xxk integer times of the reciprocal vector
+   ! if xxk is the difference of two vectors, then return true if these
+   ! two vector are the same
+   FUNCTION isGVec(xxk)
+      USE constants_epw, ONLY : eps6
+
+      IMPLICIT NONE
+      LOGICAL :: isGVec
+      REAL(dp), INTENT(IN) :: xxk(3)
+      isGVec = &
+         ABS(xxk(1) - NINT(xxk(1))) < eps6 .and. &
+         ABS(xxk(2) - NINT(xxk(2))) < eps6 .and. &
+         ABS(xxk(3) - NINT(xxk(3))) < eps6
+   END FUNCTION
+   !
+   !-----------------------------------------------------------------------
+   ! Return the global index of the local k point ik
+   FUNCTION ikqLocal2Global(ikq, nkqtotf)
+      USE division,       ONLY : fkbounds
+
+      IMPLICIT NONE
+      INTEGER, INTENT(IN) :: ikq, nkqtotf ! ik or iq
+      INTEGER :: ikqLocal2Global
+      INTEGER :: startn, lastn
+
+      CALL start_clock('ik_l2g')
+      CALL fkbounds(nkqtotf, startn, lastn)
+
+      ikqLocal2Global = startn + ikq - 1
+      IF (ikqLocal2Global > lastn) THEN
+         CALL errore('ikqLocal2Global', 'Index of k/q is beyond this pool.', 1)
+      END IF
+      CALL stop_clock('ik_l2g')
+
+   END FUNCTION
+   !
+   !-----------------------------------------------------------------------
+   ! Return the global index of the local k point ik
+   FUNCTION ikGlobal2Local(ik_g, nktotf)
+      USE division,       ONLY : fkbounds
+
+      IMPLICIT NONE
+      INTEGER, INTENT(IN) :: ik_g, nktotf ! ik or iq
+      INTEGER :: ikGlobal2Local
+      INTEGER :: startn, lastn
+
+      CALL fkbounds(nktotf, startn, lastn)
+
+      ikGlobal2Local = ik_g - startn + 1
+
+      IF(ikGlobal2Local <= 0) THEN
+         ikGlobal2Local = 0
+      END IF
+   END FUNCTION
+   !
+   !-----------------------------------------------------------------------
+   ! Return EXP(-(energy/sigma)**2)
+   SUBROUTINE cal_f_delta(energy, sigma, f_delta)
+      USE constants_epw, ONLY : ryd2mev, one, ryd2ev, two, zero
+
+      IMPLICIT NONE
+      REAL(dp), INTENT(IN) :: energy(:), sigma
+      REAL(dp), INTENT(OUT) :: f_delta(:)
+
+      f_delta = EXP(-(energy/sigma)**2)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   ! Calculate Hpsi with psi as input to use the diagon sovler in KS_solver cegterg
+   ! cegterg take two external subroutine to calculate Hpsi and Spsi to calculate
+   ! ( H - e S ) * evc = 0, since H and S is not saved due to their sizes
+   ! Hamil need to be passed to h_psi because the parameter space is fixed
+   ! to meet the requirement of Davidson diagonalization.
+   SUBROUTINE h_psi_plrn(lda, n, m, psi, hpsi)
+      USE elph2,         ONLY : nkf, nqtotf, wf, xqf, nktotf, etf
+      USE epwcom,        ONLY : model_vertex_plrn, time_rev_A_plrn, nbndsub
+      USE epwcom,        ONLY : mixing_Plrn, type_plrn, nhblock_plrn
+      USE epwcom,        ONLY : beta_plrn
+      USE constants_epw, ONLY : czero, one, two, zero, cone, eps2, ci
+      USE mp_world,      ONLY : mpime, world_comm
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE modes,         ONLY : nmodes
+      USE constants_epw, ONLY : czero
+      USE elph2,         ONLY : nkf, nktotf
+      USE io_global,     ONLY : stdout
+      !
+      IMPLICIT NONE
+      !
+      INTEGER, INTENT(IN) :: lda
+      !! leading dimension of arrays psi, spsi, hpsi, which is nkf * nbnd_plrn
+      INTEGER, INTENT(IN) :: n
+      !! true dimension of psi, spsi, hpsi
+      INTEGER, INTENT(IN) :: m
+      !! number of states psi
+      COMPLEX(DP), INTENT(INOUT) :: psi(lda,m)
+      !! the wavefunction
+      COMPLEX(DP), INTENT(OUT) :: hpsi(lda,m)
+
+      INTEGER :: iq, ik, ikq, ikpg, ik_global, ibnd, jbnd, iplrn, indexkn1, indexkn2, indexkn3
+      INTEGER :: start_mode, inu, iqpg, startn, lastn, index_loc, index_blk
+      INTEGER :: ibp, ibm, ikpb, ikmb, ivec
+      COMPLEX(DP) :: prefac, ctemp, ctemp2, hamil_kq
+      COMPLEX(DP), ALLOCATABLE :: hamiltonian(:), hpsi_global(:, :) ! One row of Hamiltonian
+
+      ! Gather psi (dimension nkf) to form eigVec (dimension nktotf)
+      CALL start_clock('cal_hpsi')
+      IF(lda < nkf * nbnd_plrn) CALL errore('h_psi_plrn', 'leading dimension of arrays psi is not correct', 1)
+      eigVec = czero
+      DO ik = 1, nkf
+         ik_global = ikqLocal2Global(ik, nktotf)
+         DO ibnd = 1, nbnd_plrn
+            indexkn1 = (ik-1)*nbnd_plrn + ibnd
+            indexkn2 = (ik_global-1)*nbnd_plrn + ibnd
+            eigVec(indexkn2, 1:m) = psi(indexkn1, 1:m)
+         END DO
+      END DO
+      CALL mp_sum(eigVec, inter_pool_comm)
+      !
+
+      ! Iterative diagonalization only get the lowest eigenvalues,
+      ! however, we will need the highest eigenvalues if we are calculating hole polaron
+      ! so, we multiply hpsi by -1, and get eigenvalues in diagonalization
+      ! and then multiply eigenvalues by -1.
+      hpsi(1:lda,1:m) = czero
+      DO ik = 1, nkf
+         DO ibnd = 1, nbnd_plrn
+            indexkn1 = (ik - 1) * nbnd_plrn + ibnd
+            IF(nhblock_plrn == 1) THEN
+               index_loc = indexkn1
+               index_blk = 1
+            ELSE
+               index_loc = MOD(indexkn1 - 1, hblocksize) + 1
+               index_blk = INT((indexkn1 - 1) / hblocksize) + 1
+            END IF
+            IF (index_loc == 1 .and. nhblock_plrn /= 1) CALL get_buffer(Hamil, lword_h, ihamil, index_blk)
+
+            DO ikq = 1, nktotf
+               DO jbnd = 1, nbnd_plrn
+                  indexkn2 = (ikq - 1)*nbnd_plrn + jbnd
+                  hpsi(indexkn1, 1:m) = hpsi(indexkn1, 1:m) - &
+                     type_plrn * Hamil(indexkn2, index_loc) * eigVec(indexkn2, 1:m)
+               END DO
+            END DO
+         END DO
+      END DO
+
+      CALL stop_clock('cal_hpsi')
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   SUBROUTINE s_psi_plrn(lda, n, m, psi, spsi)
+      IMPLICIT NONE
+
+      INTEGER,     INTENT(IN) :: lda, n, m
+      COMPLEX(DP), INTENT(IN) :: psi(lda,m), spsi(lda,m)
+
+      CALL errore('s_psi_plrn',"WARNING: This function should not be called at all!", 1)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   SUBROUTINE g_psi_plrn( lda, n, m, npol, psi, e )
+      !! This routine computes an estimate of the inverse Hamiltonian
+      !! and applies it to m wavefunctions.
+      IMPLICIT NONE
+
+      INTEGER     :: lda, n, m, npol
+      COMPLEX(DP) :: psi(lda, npol, m)
+      REAL(DP)    :: e(m)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   SUBROUTINE get_cfac(xk, nrr_k, ndegen_k, irvec_r, dims, cfac)
+      USE epwcom, ONLY : use_ws
+      USE constants_epw, ONLY : twopi, ci, czero
+      USE kinds,         ONLY : dp, i4b
+
+      IMPLICIT NONE
+
+
+      INTEGER, INTENT(IN):: nrr_k, dims
+      INTEGER, INTENT(IN):: ndegen_k(nrr_k, dims, dims)
+      REAL(KIND=dp), INTENT (IN) :: xk(3), irvec_r(3, nrr_k)
+      COMPLEX(KIND=dp), INTENT(OUT) :: cfac(nrr_k, dims, dims)
+      ! Local Variables
+      REAL(KIND=dp) :: rdotk(nrr_k)
+      INTEGER:: ikk, ikq, iw, iw2, ir
+
+      cfac = czero
+      rdotk = czero
+
+      CALL dgemv('t', 3, nrr_k, twopi, irvec_r, 3, xk, 1, 0.0_dp, rdotk, 1 )
+      !
+      IF (use_ws) THEN
+         DO iw=1, dims
+            DO iw2=1, dims
+               DO ir = 1, nrr_k
+                  IF (ndegen_k(ir,iw2,iw) > 0) THEN
+                     cfac(ir,iw2,iw)  = EXP( ci*rdotk(ir) ) / ndegen_k(ir,iw2,iw)
+                  ENDIF
+               ENDDO
+            ENDDO
+         ENDDO
+      ELSE
+         cfac(:,1,1)   = EXP( ci*rdotk(:) ) / ndegen_k(:,1,1)
+      ENDIF
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   ! B_{qu} = 1/N_p \sum_{mnk} A^*_{mk+q}A_{nk} [g_{mnu}(k,q)/\hbar\omega_{qu}]
+   FUNCTION cal_Bmat(iq, inu)
+      USE elph2,  ONLY : nkf, nktotf, wf, nqtotf
+      USE epwcom, ONLY : nstate_plrn, model_vertex_plrn
+      USE epwcom, ONLY : g_start_band_plrn
+      USE constants_epw, ONLY : czero, one, eps2, cone, eps8
+      USE mp_world,      ONLY : mpime, world_comm
+
+      IMPLICIT NONE
+
+      INTEGER, INTENT(IN) :: iq, inu
+      INTEGER :: ik, ikq, ik_global, ibnd, jbnd, iplrn, indexkn1, indexkn2
+      COMPLEX(DP) :: cal_Bmat
+      COMPLEX(DP) :: prefac
+
+      ! sum k = sum 1 to nkf + mp_sum (inter_pool)
+      ! mp_sum is in polaron_scf
+      cal_Bmat = czero
+      DO ik = 1, nkf
+         ikq = ikq_all(ik, iq)
+         ik_global = ikqLocal2Global(ik, nktotf)
+         ! TODO : what should do for iplrn?
+         DO iplrn = 1, 1
+            DO ibnd = 1, nbnd_plrn
+               DO jbnd = 1, nbnd_plrn
+                  indexkn1 = (ikq - 1)*nbnd_plrn + ibnd
+                  indexkn2 = (ik_global - 1) * nbnd_plrn + jbnd
+                  IF (wf(inu, iq) > eps8 ) THEN
+                     prefac = cone/(wf(inu, iq) * REAL(nqtotf, dp))
+                  ELSE
+                     prefac = czero
+                  END IF
+                  ! B_{q\nu} = \frac{1}{N_p}\sum_{nn'k}A^*_{n'k+q}\frac{g_{n'n\nu}(k, q)}{\hbar \omega_{q\nu}} A_{nk}
+                  cal_Bmat = cal_Bmat + prefac * (eigVec(indexkn2, iplrn)) * CONJG(eigVec(indexkn1, iplrn)) * &
+                     (epf(select_bands_plrn(ibnd) - g_start_band_plrn + 1, &
+                     select_bands_plrn(jbnd) - g_start_band_plrn + 1, inu, ik)) !conjg
+               END DO
+            END DO
+         END DO
+      END DO
+
+      !JLB - discard zero or imaginary frequency modes
+      IF (wf(inu, iq) < eps8) THEN
+         cal_Bmat = czero
+      END IF
+      !JLB
+
+   END FUNCTION
+   !
+   !-----------------------------------------------------------------------
+   ! Find the index of Gamma point i.e. (0, 0, 0) in xkf_all
+   ! which contains all the crystal coordinates of the k/q points
+   ! if Gamma point is not included, return 0
+   FUNCTION indexGamma(xkf_all)
+      USE elph2,        ONLY : nkf, nktotf
+      USE mp,           ONLY : mp_sum
+      USE mp_global,    ONLY : inter_pool_comm
+
+      IMPLICIT NONE
+
+      REAL(DP), INTENT(IN) :: xkf_all(:,:)
+      INTEGER :: indexGamma, ik, ik_global
+
+      indexGamma = 0
+
+      DO ik = 1, nkf
+         ik_global = ikqLocal2Global(ik, nktotf)
+         IF(isGVec(xkf_all(1:3, ik_global))) THEN
+            indexGamma = ik_global
+         END IF
+      END DO
+      CALL mp_sum(indexGamma, inter_pool_comm)
+
+      IF(.NOT. isGVec(xkf_all(1:3, indexGamma))) THEN
+         CALL errore('indexGamma','The index of Gamma point is wrong!', 1)
+      END IF
+
+   END FUNCTION
+   !
+   SUBROUTINE norm_plrn_wf(eigVec, norm_new)
+      USE elph2,  ONLY : nkf, nqtotf, wf, nktotf
+      USE epwcom, ONLY : nstate_plrn, time_rev_A_plrn
+      USE constants_epw, ONLY : czero, one, two, cone
+      USE mp,           ONLY : mp_sum
+      USE mp_global,    ONLY : inter_pool_comm
+
+      IMPLICIT NONE
+
+      COMPLEX(DP), INTENT(INOUT) :: eigVec(:, :)
+      REAL(DP), INTENT(IN) :: norm_new
+      REAL(DP) :: norm
+      INTEGER :: iplrn
+
+
+      DO iplrn = 1, nstate_plrn
+         norm = REAL(DOT_PRODUCT(eigVec(1:nbnd_plrn*nktotf, iplrn), eigVec(1:nbnd_plrn*nktotf, iplrn)))
+         eigVec(:, iplrn) = eigVec(:, iplrn)/DSQRT(norm) * SQRT(norm_new)
+      END DO
+
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   SUBROUTINE check_time_rev_sym(eigVec)
+      USE elph2,  ONLY : nkf, nqtotf, wf, nktotf
+      USE epwcom, ONLY : nstate_plrn, time_rev_A_plrn
+      USE constants_epw, ONLY : czero, one, two, cone
+      USE mp,           ONLY : mp_sum
+      USE mp_global,    ONLY : inter_pool_comm
+
+      IMPLICIT NONE
+
+      INTEGER :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg, indexkn1, indexkn2
+      INTEGER :: nPlrn_l
+      COMPLEX(DP) :: temp
+      COMPLEX(DP), INTENT(INOUT) :: eigVec(:, :)
+      COMPLEX(DP), ALLOCATABLE :: eigVec_save(:, :)
+      REAL(DP) :: norm
+
+
+      ! nstate_plrn
+      nPlrn_l = 1
+      ALLOCATE(eigVec_save(nktotf*nbnd_plrn, nPlrn_l))
+      eigVec_save = czero
+
+      DO ik = 1, nkf
+         ik_global = ikqLocal2Global(ik, nktotf)
+         ikpg = kpg_map(ik_global)
+         !if(is_mirror_k(ik)) then
+         DO ibnd = 1, nbnd_plrn
+            indexkn1 = (ikpg - 1)*nbnd_plrn + ibnd
+            indexkn2 = (ik_global - 1)*nbnd_plrn + ibnd
+            eigVec_save(indexkn1, 1:nPlrn_l)  = CONJG(eigVec(indexkn2, 1:nPlrn_l))
+         END DO
+         !end if
+      END DO
+      CALL mp_sum(eigVec_save, inter_pool_comm)
+      eigVec(:, 1:nPlrn_l) = (eigVec(:, 1:nPlrn_l) + eigVec_save(:, 1:nPlrn_l))
+
+      DO iplrn = 1, nPlrn_l
+         norm = REAL(DOT_PRODUCT(eigVec(1:nbnd_plrn*nktotf, iplrn), eigVec(1:nbnd_plrn*nktotf, iplrn)))!nktotf*nbnd_plrn*
+         eigVec(:, iplrn) = eigVec(:, iplrn)/DSQRT(norm)
+      END DO
+
+      DEALLOCATE(eigVec_save)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   SUBROUTINE diag_serial(estmteRt, eigVec)
+      USE constants_epw, ONLY : czero, twopi, ci, cone, zero
+      USE elph2,         ONLY : nkf, nqtotf, nktotf, xkf
+      USE elph2,         ONLY : etf, chw
+      USE epwcom,        ONLY : nstate_plrn, nkf1, nkf2, nkf3, type_plrn, nhblock_plrn
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+
+      IMPLICIT NONE
+
+      REAL(DP),    INTENT(OUT) :: estmteRt(:)
+      COMPLEX(DP), INTENT(OUT) :: eigVec(:, :)
+
+      REAL(DP)    :: rtemp, xxk(3), shift(3)
+      COMPLEX(DP) :: ctemp
+      INTEGER     :: iq, inu, ik, ikk, ikq, ik_global, iplrn, ikpg, icount
+      INTEGER     :: ibnd, jbnd, indexkn1, indexkn2
+      INTEGER     :: lwork, info, mm, index_loc, index_blk
+
+      INTEGER,      ALLOCATABLE :: iwork(:), ifail(:)
+      REAL(DP),     ALLOCATABLE :: rwork(:)
+      COMPLEX(DP),  ALLOCATABLE :: work(:)
+
+      COMPLEX(DP),  ALLOCATABLE :: Hamil_save(:,:)
+      COMPLEX(DP),  ALLOCATABLE :: Identity(:,:)
+
+
+      ALLOCATE(Hamil_save(nktotf*nbnd_plrn, nktotf*nbnd_plrn))
+      Hamil_save = czero
+      DO ik = 1, nkf
+         ik_global = ikqLocal2Global(ik, nktotf)
+         DO ibnd = 1, nbnd_plrn
+            indexkn1 = (ik-1)*nbnd_plrn + ibnd
+
+            index_loc = MOD(indexkn1 - 1, hblocksize) + 1
+            index_blk = INT((indexkn1 - 1)/hblocksize) + 1
+            IF (index_loc == 1 .and. nhblock_plrn /= 1) CALL get_buffer(Hamil, lword_h, ihamil, index_blk)
+
+            indexkn2 = (ik_global-1)*nbnd_plrn + ibnd
+            Hamil_save(indexkn2, 1:nktotf*nbnd_plrn) = - type_plrn * Hamil(1:nktotf*nbnd_plrn, index_loc)
+         END DO
+      END DO
+      CALL mp_sum(Hamil_save, inter_pool_comm)
+      IF (ionode) THEN
+         ALLOCATE(Identity(nktotf*nbnd_plrn, nktotf*nbnd_plrn))
+         lwork = 5*nktotf*nbnd_plrn
+         ALLOCATE( rwork( 7*nktotf*nbnd_plrn ) )
+         ALLOCATE( iwork( 5*nktotf*nbnd_plrn ) )
+         ALLOCATE( ifail( nktotf*nbnd_plrn ) )
+         ALLOCATE( work( lwork ) )
+         Identity = czero
+
+         DO ibnd = 1, nbnd_plrn*nktotf
+            Identity(ibnd, ibnd) = cone
+         END DO
+
+         eigVec = czero
+         estmteRt = zero
+
+         CALL ZHEGVX( 1, 'V', 'I', 'U', nktotf*nbnd_plrn, Hamil_save, nktotf*nbnd_plrn, Identity, nktotf*nbnd_plrn, &
+            zero, zero, 1, nstate_plrn, zero, mm, estmteRt(1:nstate_plrn), eigVec, nktotf*nbnd_plrn, &
+            work, lwork, rwork, iwork, ifail, info )
+
+         IF (info /= 0) CALL errore('diag_serial','Polaron: diagonal error.', 1)
+         DEALLOCATE(rwork, iwork, ifail, work, Identity)
+      END IF
+      DEALLOCATE(Hamil_save)
+      CALL mp_bcast( estmteRt, meta_ionode_id, world_comm )
+      CALL mp_bcast( eigVec, meta_ionode_id, world_comm )
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   !
+   SUBROUTINE diag_parallel(estmteRt, eigVec)
+      USE constants_epw, ONLY : czero, twopi, ci
+      USE constants_epw, ONLY : eps5, eps6, eps4, eps2, eps8, eps10
+      USE elph2,         ONLY : nkf, nqtotf, nktotf, xkf
+      USE elph2,         ONLY : etf, chw
+      USE epwcom,        ONLY : nstate_plrn, nkf1, nkf2, nkf3
+      USE epwcom,        ONLY : ethrdg_plrn
+      USE epwcom,        ONLY : adapt_ethrdg_plrn, init_ethrdg_plrn, nethrdg_plrn
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum, mp_bcast, mp_size, mp_max
+      USE mp_bands, ONLY : intra_bgrp_comm, inter_bgrp_comm, mp_start_bands
+      USE mp_bands_util, ONLY : intra_bgrp_comm_ => intra_bgrp_comm
+      USE mp_bands_util, ONLY : inter_bgrp_comm_ => inter_bgrp_comm
+
+      IMPLICIT NONE
+
+      REAL(DP),    INTENT(OUT) :: estmteRt(:)
+      COMPLEX(DP), INTENT(OUT) :: eigVec(:, :)
+
+      COMPLEX(DP), ALLOCATABLE :: Identity(:, :)
+      REAL(DP)    :: rtemp, xxk(3), shift(3)
+      REAL(DP)    :: ethrdg_init
+      COMPLEX(DP) :: ctemp
+      INTEGER     :: iq, inu, ik, ikk, ikq, ik_global, iplrn, ikpg, icount
+      INTEGER     :: ibnd, jbnd, itemp, jtemp
+      INTEGER     :: indexkn1, indexkn2
+      INTEGER :: ithr, nthr
+
+      INTEGER :: npw, npwx, dav_iter, notcnv, btype(nstate_plrn), nhpsi
+
+      INTEGER,      ALLOCATABLE :: iwork(:), ifail(:)
+      REAL(DP),     ALLOCATABLE :: rwork(:)
+      COMPLEX(DP),  ALLOCATABLE :: work(:)
+      COMPLEX(DP),  ALLOCATABLE :: psi(:, :)
+      REAL(DP)                  :: ethrdg
+
+
+
+      npw = nkf * nbnd_plrn
+
+      npwx = npw
+      CALL mp_max(npwx, inter_pool_comm)
+
+      ALLOCATE(psi(1:npwx, 1:nstate_plrn))
+
+      ! JLB: Option for adaptive threshold
+      IF (adapt_ethrdg_plrn) THEN
+         ethrdg_init = init_ethrdg_plrn
+         nthr = nethrdg_plrn
+         IF(ionode) THEN 
+            WRITE(stdout, "(a)") "     Adaptive threshold on iterative diagonalization activated:"
+            WRITE(stdout, "(a)") "     threshold, # iterations, eigenvalue(Ry)"
+         END IF 
+      ELSE
+         nthr = 1
+      END IF
+      !
+      DO ithr = 1, nthr
+
+         psi = czero
+         btype(1:nstate_plrn) = 1
+         !
+         IF (adapt_ethrdg_plrn) THEN
+            ethrdg = 10**(LOG10(ethrdg_init) + (ithr-1)*(LOG10(ethrdg_plrn) - LOG10(ethrdg_init))/(nthr-1))
+         ELSE
+            ethrdg = ethrdg_plrn
+         END IF
+         !
+         ! split eigVector (nqtotf) into parallel pieces psi (nkf), contains corresponding part with Hpsi
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            DO ibnd = 1, nbnd_plrn
+               indexkn1 = (ik-1)*nbnd_plrn + ibnd
+               indexkn2 = (ik_global-1)*nbnd_plrn + ibnd
+               psi(indexkn1, 1:nstate_plrn) = eigVec(indexkn2, 1:nstate_plrn)
+            END DO
+         END DO
+         ! inter_bgrp_comm should be some non-existing number,
+         ! to make the nodes in bgrp equal to 1
+         ! intra_bgrp_comm is parallel PW in pwscf
+         ! but here it should be parallel K.
+         ! Save them before change them
+         itemp = intra_bgrp_comm_
+         jtemp = inter_bgrp_comm_
+         !call mp_start_bands(1, world_comm)
+         !
+         intra_bgrp_comm_ = inter_pool_comm
+         inter_bgrp_comm_ = inter_bgrp_comm
+         !
+         !write(stdout, *) "test communicator", inter_pool_comm, inter_bgrp_comm, intra_bgrp_comm
+         !write(stdout, *) mp_size(inter_pool_comm)
+         !write(stdout, *) "past test inter_pool communicator"
+         !write(stdout, *) mp_size(inter_bgrp_comm)
+         !write(stdout, *) "past test inter_bgrp communicator"
+         !write(stdout, *) mp_size(intra_bgrp_comm)
+         !write(stdout, *) "past test intra_bgrp communicator" 4*
+         CALL start_clock('cegterg_prln')
+         CALL cegterg( h_psi_plrn, s_psi_plrn, .false., g_psi_plrn, &
+            npw, npwx, nstate_plrn, nstate_plrn*10, 1, psi, ethrdg, &
+            estmteRt, btype, notcnv, .false., dav_iter, nhpsi)
+         CALL start_clock('cegterg_prln')
+         IF(adapt_ethrdg_plrn .AND. ionode) WRITE(stdout, "(a, E14.6, I6, E14.6)") "   ", ethrdg, dav_iter, estmteRt
+         IF(notcnv>0 .AND. ionode) WRITE(stdout, "(a)") "   WARNING: Some eigenvalues not converged, & 
+         check initialization, ethrdg_plrn or try adapt_ethrdg_plrn"
+         !
+         intra_bgrp_comm_ = itemp
+         inter_bgrp_comm_ = jtemp
+         !
+         !CALL gatherVector()
+         eigVec = czero
+         DO ik = 1, nkf
+            ik_global = ikqLocal2Global(ik, nktotf)
+            DO ibnd = 1, nbnd_plrn
+               indexkn1 = (ik-1)*nbnd_plrn + ibnd
+               indexkn2 = (ik_global-1)*nbnd_plrn + ibnd
+               eigVec(indexkn2, 1:nstate_plrn) = psi(indexkn1, 1:nstate_plrn)
+            END DO
+         END DO
+         CALL mp_sum(eigVec, inter_pool_comm)
+
+      END DO
+      !
+      DEALLOCATE(psi)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   ! Write ionic positions and displacements
+   SUBROUTINE write_plrn_dtau_xsf(dtau, nqf1, nqf2, nqf3, filename, species)
+      USE constants_epw, ONLY : czero, ryd2ev, ryd2mev, zero, bohr2ang
+      !USE elph2,         ONLY : nkf, nqtotf
+      USE epwcom,        ONLY : nstate_plrn!, nqf1, nqf2, nqf3
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE modes,         ONLY : nmodes
+      USE ions_base,     ONLY : nat, amass, ityp, tau, atm, nsp, na, ntypx
+      USE cell_base,     ONLY : at, alat
+
+      IMPLICIT NONE
+
+      COMPLEX(DP), INTENT(IN) :: dtau(:, :)
+      INTEGER, INTENT(IN) :: nqf1, nqf2, nqf3
+      CHARACTER(LEN=*), INTENT(IN) :: filename
+      INTEGER, INTENT(IN), OPTIONAL :: species(50)
+
+      REAL(DP)    :: rtemp, cell(3, 3), shift(1:3)
+      INTEGER     :: wan_func_file, indexkn1, nbnd_out, nat_all, nptotf, nqf_s(1:3)
+      INTEGER     :: ix, iy, iz, iRp_local, iRp, iatm, idir, iatm_all, iatm_sp, ika
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg, isp, Rp_vec(1:3)
+
+      INTEGER,  ALLOCATABLE   :: elements(:)
+      REAL(DP), ALLOCATABLE   :: atoms(:,:), displacements(:,:)
+      !CHARACTER(LEN=3), ALLOCATABLE  :: elements( : )
+
+
+      ! total number of atoms is
+      ! (number of atoms in unit cell) x (number of cells in the supercell)
+      nptotf =  nqf1 * nqf2 * nqf3
+      nqf_s  = (/nqf1, nqf2, nqf3/)
+      nat_all = nat * nptotf
+
+      ALLOCATE(atoms(3, nat_all))
+      ALLOCATE(elements(nat_all))
+      ALLOCATE(displacements(3, nat_all))
+
+      atoms = zero
+      elements = 0
+      displacements = zero
+
+      cell(1:3, 1) = at(1:3, 1) * nqf1
+      cell(1:3, 2) = at(1:3, 2) * nqf2
+      cell(1:3, 3) = at(1:3, 3) * nqf3
+
+
+      !print *, "nsp, ityp(1) ", nsp, ityp(1) ! nsp = 0, cannot use it!
+      !print *, "at 1:", at(1:3, 1)
+      !print *, "at 2:", at(1:3, 2)
+      !print *, "at 3:", at(1:3, 3)
+      iatm_all = 0
+      DO isp = 1, ntypx
+         DO iRp = 1, nptotf
+            Rp_vec(1:3) = index_Rp(iRp, nqf_s)
+            DO iatm = 1, nat
+               IF(ityp(iatm) == isp) THEN
+                  iatm_all = iatm_all + 1
+                  ika = (iatm - 1) * 3 + 1
+                  !Rp(1:3) = (ix - nqf1/2) * at(1:3, 1) + (iy - nqf2/2) * at(1:3, 2) + (iz - nqf3/2) * at(1:3, 3)
+                  shift(1:3) = Rp_vec(1) * at(1:3, 1) + Rp_vec(2) * at(1:3, 2) + Rp_vec(3) * at(1:3, 3)
+                  IF(PRESENT(species)) THEN
+                     elements(iatm_all) = species(ityp(iatm))
+                  ELSE
+                     elements(iatm_all) = ityp(iatm)
+                  END IF
+                  atoms(1:3, iatm_all) = tau(1:3, iatm) + shift(1:3)
+                  displacements(1:3, iatm_all) = REAL(dtau(iRp, ika:ika+2))
+               END IF
+            END DO
+         END DO
+      END DO
+
+
+      !displacements = zero
+
+      cell = cell * alat
+      atoms = atoms * alat
+
+      CALL write_xsf_file(filename, cell*bohr2ang, elements, atoms*bohr2ang, displacements*bohr2ang)
+
+      DEALLOCATE(atoms, elements, displacements)
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   ! JLB: Write ionic positions and displacements for transformed supercell
+   SUBROUTINE scell_write_plrn_dtau_xsf(dtau, nqtotf_p, nRp_p, Rp_p, as_p, filename, species)
+      USE constants_epw, ONLY : czero, ryd2ev, ryd2mev, zero, bohr2ang
+      !USE elph2,         ONLY : nkf, nqtotf
+      USE epwcom,        ONLY : nstate_plrn!, nqf1, nqf2, nqf3
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE modes,         ONLY : nmodes
+      USE ions_base,     ONLY : nat, amass, ityp, tau, atm, nsp, na, ntypx
+      USE cell_base,     ONLY : at, alat
+
+      IMPLICIT NONE
+
+      COMPLEX(DP), INTENT(IN) :: dtau(:, :)
+      INTEGER, INTENT(IN) :: nqtotf_p, nRp_p, Rp_p(:,:)
+      REAL(DP), INTENT(IN) :: as_p(3,3)
+      CHARACTER(LEN=*), INTENT(IN) :: filename
+      INTEGER, INTENT(IN), OPTIONAL :: species(50)
+
+      REAL(DP)    :: rtemp, cell(3, 3), shift(1:3)
+      INTEGER     :: wan_func_file, indexkn1, nbnd_out, nat_all, nqf_s(1:3)
+      INTEGER     :: ix, iy, iz, iRp_local, iRp, iatm, idir, iatm_all, iatm_sp, ika
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg, isp, Rp_vec(1:3)
+
+      INTEGER,  ALLOCATABLE   :: elements(:)
+      REAL(DP), ALLOCATABLE   :: atoms(:,:), displacements(:,:)
+      !CHARACTER(LEN=3), ALLOCATABLE  :: elements( : )
+
+
+      ! total number of atoms is
+      ! (number of atoms in unit cell) x (number of cells in the supercell)
+      nat_all = nat * nRp_p
+
+      ALLOCATE(atoms(3, nat_all))
+      ALLOCATE(elements(nat_all))
+      ALLOCATE(displacements(3, nat_all))
+
+      atoms = zero
+      elements = 0
+      displacements = zero
+
+      cell(1:3, 1) = as_p(1, 1:3)
+      cell(1:3, 2) = as_p(2, 1:3)
+      cell(1:3, 3) = as_p(3, 1:3)
+
+      !print *, "nsp, ityp(1) ", nsp, ityp(1) ! nsp = 0, cannot use it!
+      !print *, "at 1:", at(1:3, 1)
+      !print *, "at 2:", at(1:3, 2)
+      !print *, "at 3:", at(1:3, 3)
+      iatm_all = 0
+      DO isp = 1, ntypx
+         DO iRp = 1, nRp_p
+            Rp_vec(1:3) = Rp_p(1:3, iRp)
+            DO iatm = 1, nat
+               IF(ityp(iatm) == isp) THEN
+                  iatm_all = iatm_all + 1
+                  ika = (iatm - 1) * 3 + 1
+                  !Rp(1:3) = (ix - nqf1/2) * at(1:3, 1) + (iy - nqf2/2) * at(1:3, 2) + (iz - nqf3/2) * at(1:3, 3)
+                  shift(1:3) = Rp_vec(1) * at(1:3, 1) + Rp_vec(2) * at(1:3, 2) + Rp_vec(3) * at(1:3, 3)
+                  IF(PRESENT(species)) THEN
+                     elements(iatm_all) = species(ityp(iatm))
+                  ELSE
+                     elements(iatm_all) = ityp(iatm)
+                  END IF
+                  atoms(1:3, iatm_all) = tau(1:3, iatm) + shift(1:3)
+                  displacements(1:3, iatm_all) = REAL(dtau(iRp, ika:ika+2))
+               END IF
+            END DO
+         END DO
+      END DO
+
+
+      !displacements = zero
+
+      cell = cell * alat
+      atoms = atoms * alat
+
+      CALL write_xsf_file(filename, cell*bohr2ang, elements, atoms*bohr2ang, displacements*bohr2ang)
+
+      DEALLOCATE(atoms, elements, displacements)
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   SUBROUTINE write_xsf_file(filename, cell, elements, atoms, forces, data_cube)
+
+      IMPLICIT NONE
+
+      INTEGER, INTENT(IN)              :: elements(:)
+      REAL(dp), INTENT(IN)             :: cell(3, 3), atoms(:, :)
+      REAL(dp), INTENT(IN) , OPTIONAL  :: forces(:, :), data_cube(:, :, :)
+      CHARACTER(LEN=*), INTENT(IN):: filename
+
+      REAL(DP)    :: rtemp
+      INTEGER     :: file_unit, indexkn1, nbnd_out
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg
+      INTEGER     :: ix, iy, iz, nx, ny, nz, iatm, natm, shapeTemp(3)
+
+      file_unit = 602
+      natm = UBOUND(elements, DIM=1)
+
+      OPEN (UNIT=file_unit, FILE=TRIM(filename), FORM='formatted', STATUS='unknown')
+
+      WRITE (file_unit, *) '#'
+      WRITE (file_unit, *) '# Generated by the EPW polaron code'
+      WRITE (file_unit, *) '#'
+      WRITE (file_unit, *) '#'
+      WRITE (file_unit, *) 'CRYSTAL'
+      WRITE (file_unit, *) 'PRIMVEC'
+      WRITE (file_unit, '(3f12.7)') cell(1:3, 1)
+      WRITE (file_unit, '(3f12.7)') cell(1:3, 2)
+      WRITE (file_unit, '(3f12.7)') cell(1:3, 3)
+
+      WRITE (file_unit, *) 'PRIMCOORD'
+      ! The second number is always 1 for PRIMCOORD coordinates,
+      ! according to http://www.xcrysden.org/doc/XSF.html
+      WRITE (file_unit, *)  natm, 1
+
+      DO iatm = 1, natm
+         IF(PRESENT(forces)) THEN
+            WRITE(file_unit,'(I3, 3x, 3f15.9, 3x, 3f15.9)') elements(iatm), atoms(1:3, iatm), forces(1:3, iatm)
+         ELSE
+            WRITE(file_unit,'(I3, 3x, 3f15.9)') elements(iatm), atoms(1:3, iatm)
+         END IF
+      END DO
+
+      IF(PRESENT(data_cube)) THEN
+         shapeTemp = SHAPE(data_cube)
+         WRITE (file_unit, '(/)')
+         WRITE (file_unit, '("BEGIN_BLOCK_DATAGRID_3D",/,"3D_field",/, "BEGIN_DATAGRID_3D_UNKNOWN")')
+         WRITE (file_unit, '(3i6)') SHAPE(data_cube)
+         WRITE (file_unit, '(3f12.6)') 0.0, 0.0, 0.0
+         WRITE (file_unit, '(3f12.7)') cell(1:3, 1)
+         WRITE (file_unit, '(3f12.7)') cell(1:3, 2)
+         WRITE (file_unit, '(3f12.7)') cell(1:3, 3)
+         ! TODO: data cube is probably to large to take in the same way of lattice information
+         ! May be usefull and implemented in the furture
+         WRITE (file_unit, *) (((data_cube(ix, iy, iz), ix = 1, shapeTemp(1)), &
+            iy = 1, shapeTemp(2)), iz = 1, shapeTemp(3))
+         WRITE (file_unit, '("END_DATAGRID_3D",/, "END_BLOCK_DATAGRID_3D")')
+      END IF
+      CLOSE (file_unit)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   SUBROUTINE write_plrn_wf(eigvec_wan, filename, etf_all)
+      USE constants_epw, ONLY : czero, ryd2ev, ryd2mev
+      USE elph2,         ONLY : nkf, nqtotf, nktotf
+      USE epwcom,        ONLY : nstate_plrn, nkf1, nkf2, nkf3
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE epwcom,        ONLY : nbndsub, scell_mat_plrn
+
+      IMPLICIT NONE
+      COMPLEX(DP), INTENT(IN) :: eigvec_wan(:, :)
+      REAL(DP), INTENT(IN), OPTIONAL :: etf_all(:, :)
+      CHARACTER(LEN=*), INTENT(IN) :: filename
+
+      REAL(DP)    :: rtemp
+      INTEGER     :: wan_func_file, indexkn1, nbnd_out
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg
+
+      IF(PRESENT(etf_all)) THEN
+         nbnd_out = nbnd_plrn
+      ELSE
+         nbnd_out = nbndsub
+      END IF
+      wan_func_file = 602
+
+      OPEN(UNIT = wan_func_file, FILE = TRIM(filename))
+      IF (scell_mat_plrn) THEN
+         WRITE(wan_func_file, '(a, 3I10)') 'Scell', nktotf, nbndsub, nstate_plrn
+      ELSE
+         WRITE(wan_func_file, '(6I10)') nkf1, nkf2, nkf3, nktotf, nbndsub, nstate_plrn
+      END IF
+
+      DO ik = 1, nktotf
+         DO ibnd = 1, nbnd_out
+            DO iplrn = 1, nstate_plrn
+               indexkn1 = (ik-1)*nbnd_out + ibnd
+               IF(PRESENT(etf_all)) THEN
+                  WRITE(wan_func_file, '(2I5, 4f15.7)') ik, ibnd, etf_all(select_bands_plrn(ibnd), ik)*ryd2ev, &
+                     eigvec_wan(indexkn1, iplrn), ABS(eigvec_wan(indexkn1, iplrn))
+               ELSE
+                  WRITE(wan_func_file, '(2f15.7)') eigvec_wan(indexkn1, iplrn)
+               END IF
+            END DO
+         END DO
+      END DO
+      CLOSE(wan_func_file)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   SUBROUTINE read_plrn_wf(eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p, filename, scell, etf_all)
+      USE constants_epw, ONLY : czero
+      USE elph2,         ONLY : nkf, nqtotf, nktotf
+      USE epwcom,        ONLY : nstate_plrn, nkf1, nkf2, nkf3
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+
+      IMPLICIT NONE
+      COMPLEX(DP), ALLOCATABLE, INTENT(INOUT) :: eigvec_wan(:, :)
+      CHARACTER(LEN=*), INTENT(IN) :: filename
+      LOGICAL, INTENT(IN), OPTIONAL  :: scell
+      REAL(DP), INTENT(IN), OPTIONAL :: etf_all(:, :)
+      INTEGER, INTENT(OUT) :: nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p
+
+      REAL(DP)    :: rtemp
+      INTEGER     :: wan_func_file, nPlrn_p, indexkn1
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg
+      !JLB (dummy variables read from file)
+      INTEGER     :: i1, i2
+      REAL(DP)    :: r1
+      CHARACTER(LEN=5) :: dmmy
+
+      IF(ALLOCATED(eigvec_wan)) DEALLOCATE(eigvec_wan)
+
+      IF(ionode) THEN
+         wan_func_file = 602
+         OPEN(UNIT = wan_func_file, FILE = TRIM(filename))
+
+         IF (PRESENT(scell) .AND. scell) THEN
+            READ(wan_func_file, '(a, 3I10)') dmmy, nktotf_p, nbndsub_p, nPlrn_p
+            ! nkf1_p, nkf2_p, nkf3_p should never be called if scell=.true.
+            ! Just assigning an arbitrary value
+            nkf1_p = 0
+            nkf2_p = 0
+            nkf3_p = 0
+         ELSE
+            READ(wan_func_file, '(6I10)') nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p, nPlrn_p
+            IF(nkf1_p*nkf2_p*nkf3_p /= nktotf_p) THEN
+               CALL errore("read_plrn_wf",filename//'Not generated from the uniform grid!', 1)
+            END IF
+         END IF
+
+         ALLOCATE(eigvec_wan(nbndsub_p * nktotf_p, nPlrn_p))
+
+         eigvec_wan = czero
+         DO ik = 1, nktotf_p
+            DO ibnd = 1, nbndsub_p
+               DO iplrn = 1, nPlrn_p
+                  indexkn1 = (ik-1)*nbndsub_p + ibnd
+                  !JLB
+                  IF(PRESENT(etf_all)) THEN
+                     READ(wan_func_file, '(2I5, 3f15.7)') i1, i2, r1, eigvec_wan(indexkn1, iplrn)
+                  ELSE
+                     READ(wan_func_file, '(2f15.7)') eigvec_wan(indexkn1, iplrn)
+                  END IF
+                  !JLB
+                  !READ(wan_func_file, '(2f15.7)') eigvec_wan(indexkn1, iplrn)
+               END DO
+            END DO
+         END DO
+         CLOSE(wan_func_file)
+      END IF
+      CALL mp_bcast (nkf1_p,  meta_ionode_id, world_comm)
+      CALL mp_bcast (nkf2_p,  meta_ionode_id, world_comm)
+      CALL mp_bcast (nkf3_p,  meta_ionode_id, world_comm)
+      CALL mp_bcast (nktotf_p,meta_ionode_id, world_comm)
+      CALL mp_bcast (nPlrn_p, meta_ionode_id, world_comm)
+      CALL mp_bcast (nbndsub_p, meta_ionode_id, world_comm)
+      IF(.NOT. ALLOCATED(eigvec_wan)) THEN
+         ALLOCATE(eigvec_wan(nbndsub_p * nktotf_p, nPlrn_p))
+         eigvec_wan = czero
+      END IF
+      CALL mp_bcast (eigvec_wan, meta_ionode_id, world_comm)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   ! Write Bmat and phonon frequency to filename
+   SUBROUTINE write_plrn_bmat(eigvec_wan, filename, etf_all)
+      USE constants_epw, ONLY : czero, ryd2ev, ryd2mev
+      USE elph2,         ONLY : nkf, nqtotf
+      USE epwcom,        ONLY : nstate_plrn, nqf1, nqf2, nqf3
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE modes,         ONLY : nmodes
+      !JLB
+      USE ions_base,     ONLY : amass, ityp
+      USE epwcom,        ONLY : scell_mat_plrn
+
+
+      IMPLICIT NONE
+      COMPLEX(DP), INTENT(IN) :: eigvec_wan(:, :)
+      REAL(DP), INTENT(IN), OPTIONAL :: etf_all(:, :)
+      CHARACTER(LEN=*), INTENT(IN) :: filename
+
+      REAL(DP)    :: rtemp
+      INTEGER     :: wan_func_file, indexkn1, nbnd_out
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg
+      !!JLB
+      !INTEGER     :: ina, ialpha
+      !REAL(dp)    :: cm(3) ! change of center of mass
+
+      !IF(ionode) THEN
+
+      IF(PRESENT(etf_all)) THEN
+         nbnd_out = nmodes
+      ELSE
+         nbnd_out = nmodes
+      END IF
+      wan_func_file = 602
+
+      OPEN(UNIT = wan_func_file, FILE = TRIM(filename))
+      IF (scell_mat_plrn) THEN
+         WRITE(wan_func_file, '(a, 2I10)') 'Scell', nqtotf, nmodes
+      ELSE
+         WRITE(wan_func_file, '(5I10)') nqf1, nqf2, nqf3, nqtotf, nmodes
+      END IF
+
+      !cm=0.d0
+      !ialpha=0
+      DO ik = 1, nqtotf
+         DO ibnd = 1, nmodes ! p
+            IF(PRESENT(etf_all)) THEN ! \kappa, \alpha
+               !WRITE(wan_func_file, '(2I5, 4f15.7)') ik, ibnd, etf_all(ibnd, ik)*ryd2mev, &
+               !   eigvec_wan(ik, ibnd), ABS(eigvec_wan(ik, ibnd))
+               !!JLB: Changed format for improved accuracy
+               WRITE(wan_func_file, '(2I5, 4ES18.10)') ik, ibnd, etf_all(ibnd, ik)*ryd2mev, &
+                  eigvec_wan(ik, ibnd), ABS(eigvec_wan(ik, ibnd))
+            ELSE
+               !!JLB
+               !ina = (ibnd - 1) / 3 + 1
+               !ialpha = ialpha+1
+               !cm(ialpha) = cm(ialpha) + amass(ityp(ina))*REAL(eigvec_wan(ik, ibnd),dp)
+               !IF (ialpha==3) ialpha=0
+               !!JLB
+               !JLB: Changed format for improved accuracy
+               WRITE(wan_func_file, '(2ES18.10)') eigvec_wan(ik, ibnd)
+            END IF
+         END DO
+      END DO
+      CLOSE(wan_func_file)
+      !END IF
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   ! Read dtau from filename
+   SUBROUTINE read_plrn_dtau(eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nktotf_p, nmodes_p, filename, scell, etf_all)
+      USE constants_epw, ONLY : czero
+      USE elph2,         ONLY : nkf, nqtotf, nktotf
+      USE epwcom,        ONLY : nstate_plrn, nkf1, nkf2, nkf3
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp_world,      ONLY : world_comm
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE modes,         ONLY : nmodes
+
+      IMPLICIT NONE
+      COMPLEX(DP), ALLOCATABLE, INTENT(INOUT) :: eigvec_wan(:, :)
+      CHARACTER(LEN=*), INTENT(IN) :: filename
+      LOGICAL, INTENT(IN), OPTIONAL :: scell
+      REAL(DP), INTENT(IN), OPTIONAL :: etf_all(:, :) !JLB
+      INTEGER, INTENT(OUT) :: nkf1_p, nkf2_p, nkf3_p, nktotf_p, nmodes_p
+
+      REAL(DP)    :: rtemp
+      INTEGER     :: wan_func_file, nPlrn_p
+      INTEGER     :: iq, inu, ik, ikq, ik_global, ibnd, iplrn, ikpg, iatm, icount
+      !JLB (dummy variables read from file)
+      INTEGER     :: i1, i2
+      REAL(DP)    :: r1
+      CHARACTER(LEN=5) :: dmmy
+
+      IF(ALLOCATED(eigvec_wan)) DEALLOCATE(eigvec_wan)
+
+      IF(ionode) THEN
+         wan_func_file = 602
+         OPEN(UNIT = wan_func_file, FILE = TRIM(filename))
+
+         IF (PRESENT(scell) .AND. scell) THEN
+            READ(wan_func_file, '(a, 2I10)') dmmy, nktotf_p, nmodes_p
+            ! nkf1_p, nkf2_p, nkf3_p should never be called if scell=.true.
+            ! Just assigning an arbitrary value
+            nkf1_p = 0
+            nkf2_p = 0
+            nkf3_p = 0
+         ELSE
+            READ(wan_func_file, '(5I10)') nkf1_p, nkf2_p, nkf3_p, nktotf_p, nmodes_p
+            IF(nkf1_p*nkf2_p*nkf3_p /= nktotf_p) THEN
+               CALL errore('read_plrn_dtau', filename//'Not generated from the uniform grid!', 1)
+            END IF
+         END IF
+
+         IF(nmodes /= nmodes_p) THEN
+            CALL errore('read_plrn_dtau', "Number of phonon modes are different with last run", 1)
+         END IF
+
+         ALLOCATE(eigvec_wan(nktotf_p, nmodes_p))
+
+         eigvec_wan = czero
+         DO icount = 1, nktotf_p
+            DO iatm = 1, nmodes_p
+               IF(PRESENT(etf_all)) THEN
+                  !READ(wan_func_file, '(2I5, 3f15.7)') i1, i2, r1, eigvec_wan(icount, iatm)
+                  !JLB: Changed format for improved accuracy
+                  READ(wan_func_file, '(2I5, 3ES18.10)') i1, i2, r1, eigvec_wan(icount, iatm)
+               ELSE
+                  !READ(wan_func_file, '(2f15.7)') eigvec_wan(icount, iatm)
+                  !JLB: Changed format for improved accuracy
+                  READ(wan_func_file, '(2ES18.10)') eigvec_wan(icount, iatm)
+               END IF
+               !READ(wan_func_file, '(2f15.7)') eigvec_wan(icount, iatm)
+            END DO
+         END DO
+         CLOSE(wan_func_file)
+      END IF
+      CALL mp_bcast (nkf1_p,  meta_ionode_id, world_comm)
+      CALL mp_bcast (nkf2_p,  meta_ionode_id, world_comm)
+      CALL mp_bcast (nkf3_p,  meta_ionode_id, world_comm)
+      CALL mp_bcast (nktotf_p,meta_ionode_id, world_comm)
+      CALL mp_bcast (nmodes_p, meta_ionode_id, world_comm)
+      IF(.NOT. ALLOCATED(eigvec_wan)) THEN
+         ALLOCATE(eigvec_wan(nktotf_p, nmodes_p))
+         eigvec_wan = czero
+      END IF
+      CALL mp_bcast (eigvec_wan, meta_ionode_id, world_comm)
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   !! Fourier transform from eigVecIn to eigVecOut
+   !! ttype is 'Bloch2Wan' or 'Wan2Bloch'
+   !! Parallel version, each pool calculates its own k point set (nkf),
+   !! then the mp_sum is used to sum over different pools.
+   !! require the correct initialization of Rp_array
+   SUBROUTINE plrn_eigvec_tran(ttype, t_rev, eigVecIn, nkf1_p, nkf2_p, nkf3_p, nbndsub_p, &
+         nrr_k, ndegen_k, irvec_r, dims, eigVecOut, ip_center)
+      USE constants_epw, ONLY : czero, twopi, ci, cone, two
+      USE elph2,         ONLY : nkf, xkf, etf, chw, nktotf
+      USE epwcom,        ONLY : nstate_plrn, nbndsub, time_rev_A_plrn
+      USE epwcom,        ONLY : nkf1, nkf2, nkf3
+      USE wan2bloch,     ONLY : hamwan2bloch !!=> hamwan2bloch_old
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE mp_world,      ONLY : mpime
+      USE io_global,     ONLY : stdout
+
+      IMPLICIT NONE
+
+      COMPLEX(DP), INTENT(IN) :: eigvecIn(:, :)
+      LOGICAL,     INTENT(IN) :: t_rev
+      INTEGER,     INTENT(IN) :: nkf1_p, nkf2_p, nkf3_p, nbndsub_p
+      INTEGER,     INTENT(IN) :: nrr_k, dims, ndegen_k(:,:,:) ! ! Added for polaron calculations by Chao Lian.
+      REAL(DP),    INTENT(IN) :: irvec_r(3, nrr_k)
+      CHARACTER(LEN=9), INTENT(IN) :: ttype
+      INTEGER, INTENT(IN), OPTIONAL :: ip_center(1:3)
+      !JLB
+      !LOGICAL, OPTIONAL, INTENT(IN) :: readpol
+
+      COMPLEX(DP), INTENT(OUT) :: eigVecOut(:, :)
+      COMPLEX(KIND=dp) :: expTable(3)
+      INTEGER :: idir
+
+      REAL(DP)    :: rtemp, xxk(3), shift(3), etf_tmp(nbndsub)
+      REAL(DP) :: phi, maxreal !JLB
+      COMPLEX(DP) :: ctemp, cufkk(nbndsub, nbndsub), cfac(nrr_k, dims, dims), cufkk_k(nbndsub, nbndsub, nktotf), phase !JLB
+      COMPLEX(DP), ALLOCATABLE :: cufkkg ( :, :, :)
+      INTEGER     :: itype, iq, inu, ik, ikk, ikq, ik_global, iplrn, ikpg, icount
+      INTEGER     :: ibnd, jbnd, ix, iy, iz, indexkn1, indexkn2, i_vec(3), center_shift(1:3), nkf_p(3)
+      LOGICAL     :: is_mirror
+
+      nkf_p(1:3) = (/nkf1_p, nkf2_p, nkf3_p/)
+      IF(nbndsub_p /= nbndsub) CALL errore('plrnwfwan2bloch','Different bands included in last calculation!', 1)
+      IF(ttype == 'Bloch2Wan') THEN
+         itype =  1
+      ELSE IF (ttype == 'Wan2Bloch') THEN
+         itype = -1
+      ELSE
+         CALL errore('plrn_eigvec_tran', 'Illegal translate form; should be Bloch2Wan or Wan2Bloch!', 1)
+      END IF
+
+      IF(PRESENT(ip_center)) THEN
+         center_shift(1:3) = nkf_p/2 - ip_center
+      ELSE
+         center_shift(1:3) = 0
+      END IF
+      !! itype =  1 : Bloch2Wan: A_{mp} =  \frac{1}{N_p} \sum_{nk}A_{nk} \exp\left(ik\cdot R_p\right)U^\dagger_{mnk}
+      !! itype = -1 : Wan2Bloch: A_{nk} = \sum_{mp}A_{mp}\exp(-ik\cdot R_p) U_{mnk}
+      !! ibnd -> m, jbnd -> n
+      !! R_p from 1 to nkf1/2/3_p, note that loop in the sequence of ix, iy, and iz,
+      !! This sequence need to be consistent every time transpose between eigvec_wann and eigvec
+      eigVecOut = czero
+      DO ik = 1, nkf
+         xxk = xkf(1:3, 2 * ik - 1)
+         expTable(1:3) = EXP( twopi * ci * xxk(1:3) )
+         ik_global = ikqLocal2Global(ik, nktotf)
+         is_mirror = (t_rev .AND. (ik_global > ikpg))
+
+         CALL get_cfac(xxk, nrr_k, ndegen_k, irvec_r, dims, cfac)
+
+         CALL hamwan2bloch ( nbndsub, nrr_k, cufkk(1:nbndsub, 1:nbndsub), &
+            etf_tmp, chw, cfac, dims, is_mirror)
+
+         IF(itype == 1) cufkk(1:nbndsub, 1:nbndsub) = CONJG(TRANSPOSE(cufkk(1:nbndsub, 1:nbndsub)))
+         DO iplrn = 1, nstate_plrn
+            !icount = 0
+            !! loop over all Wannier position p
+            IF (nkf1_p == 0 .or. nkf2_p == 0 .or. nkf3_p == 0) THEN
+               CALL errore('plrn_eigvec_tran','Wrong k grid, use nkf1/2/3 to give k grid!', 1)
+            END IF
+            DO icount = 1, nkf1_p * nkf2_p * nkf3_p
+               i_vec(1:3) = MODULO(index_Rp(icount, nkf_p) + center_shift, nkf_p)
+               ! Same as EXP(twopi * ci * DOT_PRODUCT((/ix, iy, iz/), xxk)), to save time
+               ctemp = PRODUCT(expTable(1:3)**i_vec(1:3))
+               DO ibnd = 1, nbndsub_p ! loop over all Wannier state m
+                  DO jbnd = 1, nbnd_plrn ! loop over all Bloch state n
+                     indexkn1 = (icount - 1) * nbndsub + ibnd !mp
+                     indexkn2 = (ik_global - 1) * nbnd_plrn + jbnd !nk
+                     SELECT CASE(itype)
+                        CASE(1)  ! Bloch2Wan !
+                           eigVecOut(indexkn1, iplrn) = eigVecOut(indexkn1, iplrn) + &
+                              eigVecIn(indexkn2, iplrn) * ctemp/nktotf * cufkk(ibnd, select_bands_plrn(jbnd)) !JLB: Conjugate transpose taken above!
+                        CASE(-1) ! Wan2Bloch !
+                           eigVecOut(indexkn2, iplrn) = eigVecOut(indexkn2, iplrn) + &
+                              eigVecIn(indexkn1, iplrn) * CONJG(ctemp) * cufkk(select_bands_plrn(jbnd), ibnd) !JLB
+                     END SELECT
+                  END DO ! jbnd
+               END DO ! ibnd
+            END DO
+         END DO !iplrn
+      END DO ! ik
+      ! MPI sum due to the loop ik is within local k set
+      CALL mp_sum( eigVecOut, inter_pool_comm )
+   END SUBROUTINE
+   !
+   !-----------------------------------------------------------------------
+   !! JLB: Fourier transform for non-diagonal supercells
+   SUBROUTINE scell_plrn_eigvec_tran(ttype, t_rev, eigVecIn, nktotf_p, nRp_p, Rp_p, nbndsub_p, &
+         nrr_k, ndegen_k, irvec_r, dims, eigVecOut)
+      USE constants_epw, ONLY : czero, twopi, ci, cone, two
+      USE elph2,         ONLY : nkf, xkf, etf, chw, nktotf
+      USE epwcom,        ONLY : nstate_plrn, nbndsub, time_rev_A_plrn
+      USE epwcom,        ONLY : scell_mat_plrn
+      USE wan2bloch,     ONLY : hamwan2bloch !!=> hamwan2bloch_old
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE mp_world,      ONLY : mpime
+      USE io_global,     ONLY : stdout, ionode
+
+      IMPLICIT NONE
+
+      COMPLEX(DP), INTENT(IN) :: eigvecIn(:, :)
+      LOGICAL,     INTENT(IN) :: t_rev
+      INTEGER,     INTENT(IN) :: nktotf_p, nRp_p, Rp_p(:,:), nbndsub_p
+      INTEGER,     INTENT(IN) :: nrr_k, dims, ndegen_k(:,:,:) ! ! Added for polaron calculations by Chao Lian.
+      REAL(DP),    INTENT(IN) :: irvec_r(3, nrr_k)
+      CHARACTER(LEN=9), INTENT(IN) :: ttype
+
+      COMPLEX(DP), INTENT(OUT) :: eigVecOut(:, :)
+      COMPLEX(KIND=dp) :: expTable(3)
+      INTEGER :: idir
+
+      REAL(DP)    :: rtemp, xxk(3), shift(3), etf_tmp(nbndsub)
+      REAL(DP) :: phi, maxreal !JLB
+      COMPLEX(DP) :: ctemp, cufkk(nbndsub, nbndsub), cfac(nrr_k, dims, dims), cufkk_k(nbndsub, nbndsub, nktotf), phase !JLB
+      COMPLEX(DP), ALLOCATABLE :: cufkkg ( :, :, :)
+      INTEGER     :: itype, iq, inu, ik, ikk, ikq, ik_global, iplrn, ikpg, iRp
+      INTEGER     :: ibnd, jbnd, ix, iy, iz, indexkn1, indexkn2
+      LOGICAL     :: is_mirror
+
+      IF(nbndsub_p /= nbndsub) CALL errore('plrnwfwan2bloch','Different bands included in last calculation!',1)
+      IF(ttype == 'Bloch2Wan') THEN
+         itype =  1
+      ELSE IF (ttype == 'Wan2Bloch') THEN
+         itype = -1
+      ELSE
+         CALL errore('plrn_eigvec_tran', 'Illegal translate form; should be Bloch2Wan or Wan2Bloch!', 1)
+      END IF
+
+      !! itype =  1 : Bloch2Wan: A_{mp} =  \frac{1}{N_p} \sum_{nk}A_{nk} \exp\left(ik\cdot R_p\right)U^\dagger_{mnk}
+      !! itype = -1 : Wan2Bloch: A_{nk} = \sum_{mp}A_{mp}\exp(-ik\cdot R_p) U_{mnk}
+      !! ibnd -> m, jbnd -> n
+      !! R_p from 1 to nktotf_p
+      !! This sequence need to be consistent every time transpose between eigvec_wann and eigvec
+      eigVecOut = czero
+      DO ik = 1, nkf
+         xxk = xkf(1:3, 2 * ik - 1)
+         ik_global = ikqLocal2Global(ik, nktotf)
+         is_mirror = (t_rev .AND. (ik_global > ikpg))
+         !
+         CALL get_cfac(xxk, nrr_k, ndegen_k, irvec_r, dims, cfac)
+         CALL hamwan2bloch ( nbndsub, nrr_k, cufkk(1:nbndsub, 1:nbndsub), &
+            etf_tmp, chw, cfac, dims, is_mirror)
+         IF(itype == 1) cufkk(1:nbndsub, 1:nbndsub) = CONJG(TRANSPOSE(cufkk(1:nbndsub, 1:nbndsub)))
+         !
+         DO iplrn = 1, nstate_plrn
+            !icount = 0
+            !! loop over all Wannier position p
+            DO iRp = 1, nRp_p
+               ctemp = EXP(twopi * ci * DOT_PRODUCT(xxk, Rp_p(1:3, iRp)))
+               DO ibnd = 1, nbndsub_p ! loop over all Wannier state m
+                  DO jbnd = 1, nbnd_plrn ! loop over all Bloch state n
+                     indexkn1 = (iRp - 1) * nbndsub + ibnd !mp
+                     indexkn2 = (ik_global - 1) * nbnd_plrn + jbnd !nk
+                     SELECT CASE(itype)
+                        CASE(1)  ! Bloch2Wan !
+                           eigVecOut(indexkn1, iplrn) = eigVecOut(indexkn1, iplrn) + &
+                              eigVecIn(indexkn2, iplrn) * ctemp/nktotf * cufkk(ibnd, select_bands_plrn(jbnd))
+                        CASE(-1) ! Wan2Bloch !
+                           eigVecOut(indexkn2, iplrn) = eigVecOut(indexkn2, iplrn) + &
+                              eigVecIn(indexkn1, iplrn) * CONJG(ctemp) * cufkk(select_bands_plrn(jbnd), ibnd)
+                     END SELECT
+                  END DO ! jbnd
+               END DO ! ibnd
+            END DO
+         END DO !iplrn
+      END DO ! ik
+      ! MPI sum due to the loop ik is within local k set
+      CALL mp_sum( eigVecOut, inter_pool_comm )
+      !
+   END SUBROUTINE
+   !!JLB
+   !-----------------------------------------------------------------------
+   !! Interpolate Ank and write to Ank.band.plrn,
+   !! especially used to visualize phonon contribution to polaron in band-mode
+   SUBROUTINE interp_plrn_wf(nrr_k, ndegen_k, irvec_r, dims)
+      USE constants_epw, ONLY : zero, ryd2ev, czero
+      USE io_global,     ONLY : stdout, ionode
+      USE epwcom,        ONLY : type_plrn, nbndsub, nstate_plrn
+      USE elph2,         ONLY : nktotf, etf
+
+      IMPLICIT NONE
+
+      INTEGER, INTENT (IN) :: nrr_k, dims, ndegen_k(:,:,:) ! ! Added for polaron calculations by Chao Lian.
+      REAL(DP), INTENT (IN) :: irvec_r(3, nrr_k)
+
+      COMPLEX(DP), ALLOCATABLE :: eigvec_wan(:, :)
+      REAL(DP),   ALLOCATABLE :: dtau_r(:, :)
+      INTEGER :: iRp, Rp_vec(3), i_center(2), ip_center(3)
+
+      INTEGER  :: nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p
+      INTEGER  :: ik_bm, band_pos, ierr
+      REAL(DP) :: efermi
+
+      IF(ionode) WRITE(stdout, "(5x, a)") "Start of interpolation of electronic band structure."
+      IF(.NOT. ALLOCATED(etf_all)) THEN
+         CALL errore('interp_plrn_wf','etf_all should be correctly prepared before calling interp_plrn_wf', 1)
+      END IF
+
+      !!FIXME: When selecting band in solving polaron, nbndsub_p should be changed when output
+      CALL read_plrn_wf(eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p, 'Amp.plrn')
+
+      i_center = MAXLOC(ABS(eigvec_wan))
+
+      ip_center = index_Rp(i_center(1)/nbndsub_p + 1, (/nkf1_p, nkf2_p, nkf3_p/))
+      WRITE(stdout, '(5x, a, i8, 3i5)') "The largest Amp ", i_center(1), ip_center
+
+      CALL plrn_eigvec_tran('Wan2Bloch', .false., eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nbndsub_p, &
+         nrr_k, ndegen_k, irvec_r, dims, eigVec, ip_center)
+
+      CALL write_plrn_wf(eigVec, 'Ank.band.plrn', etf_all)
+
+      IF(ALLOCATED(eigvec_wan)) DEALLOCATE(eigvec_wan)
+   END SUBROUTINE
+   !
+   !!-----------------------------------------------------------------------
+   !! Interpolate bmat and write to Bmat.band.plrn,
+   !! especially used to visualize phonon contribution to polaron in band-mode
+   SUBROUTINE interp_plrn_bq(nrr_q, ndegen_q, irvec_q, rws, nrws)
+      USE elph2,         ONLY : xqf, wf, nqtotf
+      USE modes,         ONLY : nmodes
+      USE constants_epw, ONLY : czero
+      USE io_global,     ONLY : stdout, ionode
+
+      IMPLICIT NONE
+      INTEGER, INTENT (IN) :: nrr_q, ndegen_q(:,:,:) ! ! Added for polaron calculations by Chao Lian.
+      INTEGER, INTENT (IN) :: irvec_q(3, nrr_q)
+      INTEGER,  INTENT (IN) :: nrws
+      REAL(DP), INTENT (IN) :: rws(:, :)
+
+      INTEGER :: nqf1_p, nqf2_p, nqf3_p, nqtotf_p, nmodes_p, ierr
+      INTEGER :: iRp, ina, Rp_vec(3), i_center(2), ip_center(3)
+
+      COMPLEX(DP), ALLOCATABLE :: Bmat(:,:)
+      COMPLEX(DP), ALLOCATABLE :: dtau(:, :)
+      REAL(DP),    ALLOCATABLE :: dtau_r(:, :)
+
+      CALL read_plrn_dtau(dtau, nqf1_p, nqf2_p, nqf3_p, nqtotf_p, nmodes_p, 'dtau.plrn')
+
+      ALLOCATE(dtau_r(nqtotf_p, nmodes/3), STAT = ierr)
+      IF (ierr /= 0) CALL errore('interp_plrn_bq', 'Error allocating Bmat', 1)
+      dtau_r = czero
+      DO iRp = 1, nqtotf_p
+         DO ina = 1, nmodes/3 ! ika -> kappa alpha
+            dtau_r(iRp, ina) = NORM2(REAL(dtau(iRp, (ina-1)*3+1:ina*3)))
+         END DO
+      END DO
+      i_center = MAXLOC(ABS(dtau_r))
+      ip_center = index_Rp(i_center(1), (/nqf1_p, nqf2_p, nqf3_p/))
+
+      ALLOCATE(Bmat(nqtotf, nmodes), STAT = ierr)
+      IF (ierr /= 0) CALL errore('interp_plrn_bq', 'Error allocating Bmat', 1)
+      Bmat = czero
+
+      CALL plrn_bmat_tran('Dtau2Bmat', .false., dtau, nqf1_p, nqf2_p, nqf3_p, &
+         nrr_q, ndegen_q, irvec_q, rws, nrws, Bmat, ip_center)
+
+      IF(ionode) CALL write_plrn_bmat(Bmat, 'Bmat.band.plrn', wf)
+
+      DEALLOCATE(dtau)
+      DEALLOCATE(Bmat)
+      DEALLOCATE(dtau_r)
+   END SUBROUTINE
+   !
+   !!-----------------------------------------------------------------------
+   !! Fourier transform between Bmat and dtau
+   !! Dtau2Bmat : B_{q\nu} = -1/N_p\sum_{\kappa\alpha p}C_{q\kappa \nu}\Delta\tau_{\kappa\alpha p}  e_{\kappa\alpha\nu}(q)\exp(iqR_p)
+   !! Bmat2Dtau : \Delta \tau_{\kappa\alpha p} = -\sum_{q\nu} 1/(C_{q\kappa \nu}) B^*_{q\nu} e_{\kappa\alpha,\nu}(q) \exp(iqR_p)
+   !! C_{q\kappa \nu} = N_p\left(\frac{M_k\omega_{q\nu}}{2\hbar}\right)^{\frac{1}{2}} = N_p(M_k)^{\frac{1}{2}}D_{q\nu}
+   !! D_{q \nu} = \left(\frac{\omega_{q\nu}}{2\hbar}\right)^{\frac{1}{2}}
+   SUBROUTINE plrn_bmat_tran(ttype, t_rev, mat_in, nqf1_p, nqf2_p, nqf3_p, &
+         nrr_q, ndegen_q, irvec_q, rws, nrws, mat_out, ip_center)
+      USE elph2,         ONLY : xqf, nqtotf, nkf, wf
+      USE modes,         ONLY : nmodes
+      USE constants_epw, ONLY : eps8, czero, one, two, twopi, zero, ci, cone
+      USE ions_base,     ONLY : amass, ityp
+      USE wan2bloch,     ONLY : dynwan2bloch, dynifc2blochf
+      USE epwcom,        ONLY : lifc
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE division,      ONLY : fkbounds
+      USE io_global,     ONLY : stdout
+      USE epwcom,        ONLY : type_plrn
+
+      IMPLICIT NONE
+
+      CHARACTER(LEN=9), INTENT(IN) :: ttype
+      INTEGER, INTENT (IN) :: nqf1_p, nqf2_p, nqf3_p
+      LOGICAL, INTENT (IN) :: t_rev
+      COMPLEX(DP), INTENT(IN) :: mat_in(:, :)
+      INTEGER, INTENT (IN) :: nrr_q, ndegen_q(:,:,:)
+      INTEGER, INTENT (IN) :: irvec_q(3, nrr_q)
+      INTEGER,  INTENT (IN) :: nrws
+      REAL(DP), INTENT (IN) :: rws(:, :)
+      COMPLEX(DP), INTENT(OUT) :: mat_out(:, :)
+      INTEGER, INTENT(IN), OPTIONAL :: ip_center(1:3)
+      !JLB
+      !LOGICAL, OPTIONAL, INTENT(IN) :: readpol
+      LOGICAL     :: mirror_q
+      INTEGER     :: iq, inu, ierr, imu, iatm, idir, itype, ika, ip_start, ip_end, iRp, nqf_p(1:3)
+      INTEGER     :: ix, iy, iz, ina, nqtotf_p, iqpg, nqf, nptotf, start_modes, Rp_vec(1:3), center_shift(1:3)
+      COMPLEX(DP) :: dtemp, shift(3), expTable(3), uf(nmodes, nmodes)
+      REAL(DP)    :: xxq(3), xxq_r(3), ctemp, w2(nmodes)!, wf(:,:)
+      !JLB
+      INTEGER :: jnu, ndegen(nmodes), imode, jmode
+
+      nptotf = nqf1_p * nqf2_p * nqf3_p
+      nqf_p(1:3) = (/nqf1_p, nqf2_p, nqf3_p/)
+
+      IF(nptotf <= 0) CALL errore('plrn_eigvec_tran', 'Use correct .plrn file with nqf1_p \= 0!', 1)
+      IF(ttype == 'Bmat2Dtau') THEN
+         itype =  1
+      ELSE IF (ttype == 'Dtau2Bmat') THEN
+         itype = -1
+      ELSE
+         CALL errore('plrn_eigvec_tran', 'Illegal translation form; should be Bmat2Dtau or Dtau2Bmat!', 1)
+      END IF
+
+      uf = czero
+      w2 = zero
+      wf = zero
+
+      mat_out = czero
+
+      CALL fkbounds(nptotf, ip_start, ip_end)
+
+      DO iq = 1, nqtotf ! iq -> q
+         xxq = xqf(1:3, iq)
+         xxq_r = xxq(1:3)
+         mirror_q = .false.
+         ! if we need to force the time-rev symmetry, we have to ensure that the phase of uf is fixed
+         ! i.e. uf = uf*(-q)
+         IF (t_rev) THEN
+            IF (is_mirror_q (iq)) THEN
+               xxq_r = xqf(1:3, kpg_map(iq))
+               mirror_q = .true.
+            END IF
+         END IF
+         expTable(1:3) = EXP( twopi * ci * xxq(1:3) )
+
+         ! Get phonon eigenmode and eigenfrequencies
+         IF (.NOT. lifc) THEN
+            ! Incompatible bugs found 9/4/2020 originated from the latest EPW changes.
+            ! parallel q is not working any more due to mp_sum in rgd_blk
+            CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq_r, uf, w2, mirror_q)
+         ELSE
+            CALL dynifc2blochf(nmodes, rws, nrws, xxq_r, uf, w2, mirror_q)
+         ENDIF
+
+         DO inu = 1, nmodes
+            IF (w2(inu) > -eps8) THEN
+               wf(inu, iq) =  DSQRT(ABS(w2(inu)))
+            ELSE
+               !wf(inu, iq) = -DSQRT(ABS(w2(inu)))
+               wf(inu, iq) = 0.d0
+            ENDIF
+         END DO
+
+         IF(PRESENT(ip_center)) THEN
+            center_shift(1:3) = nqf_p/2 - ip_center
+            !write(stdout, *) center_shift, ip_center
+         ELSE
+            center_shift(1:3) = 0
+         END IF
+         ! For mirror q, calculate the time-symmetric q' and get uf from q'
+         ! e_{\kappa\alpha\nu}(-q)= e^*_{\kappa\alpha\nu}(q)
+         !!IF(t_rev .and. iq > iqpg) uf = CONJG(uf) !transpose
+         start_modes = 1
+         DO inu = start_modes, nmodes ! inu -> nu
+            IF (wf(inu, iq) < eps8) CYCLE !JLB - cycle zero and imaginary frequency modes
+            DO ika = 1, nmodes ! ika -> kappa alpha
+               ina = (ika - 1) / 3 + 1
+               ctemp = DSQRT(two/(wf(inu, iq) * amass(ityp(ina))))
+               ! Parallel run, only calculate the local cell ip
+               ! Note that, ip_end obtained from fkbounds should be included
+               ! If you have 19 kpts and 2 pool,
+               ! lower_bnd= 1 and upper_bnd=10 for the first pool
+               ! lower_bnd= 1 and upper_bnd=9 for the second pool
+               DO iRp = ip_start, ip_end !, (nqf1_p + 1)/2
+                  Rp_vec(1:3) = MODULO(index_Rp(iRp, nqf_p) + center_shift, nqf_p)
+                  ! D_{\kappa\alpha\nu,p}(q) = e_{\kappa\alpha,\nu}(q) \exp(iq\cdot R_p)
+                  !dtemp = uf_q(ika, inu, iq) * PRODUCT(expTable(1:3)**Rp_vec(1:3))
+                  dtemp = uf(ika, inu) * PRODUCT(expTable(1:3)**Rp_vec(1:3))
+                  IF (itype == 1) THEN ! Bqv -> dtau
+                     ! \Delta \tau_{\kappa\alpha p} = -\frac{1}{N_p} \sum_{q\nu} C_{\kappa\nu q} D_{\kappa\alpha\nu q}  B^*_{q\nu}
+                     ! Dtau(iRp, ika) = Dtau(iRp, ika) + conjg(B(iq, inu)) * ctemp * dtemp
+                     mat_out(iRp, ika) = mat_out(iRp, ika) -  cone/REAL(nptotf, dp) * dtemp * ctemp &
+                        * (-type_plrn) * CONJG(mat_in(iq, inu))
+                  ELSE IF(itype == -1) THEN
+                     !  B_{q\nu} = \frac{1}{N_p} \sum_{\kappa\alpha p} D_{\kappa \alpha\nu, p}(q) C_{q}\nu \Delta\tau_{\kappa\alpha p}
+                     mat_out(iq, inu) = mat_out(iq, inu) - (-type_plrn) * dtemp/ctemp * CONJG(mat_in(iRp, ika)) !JLB: dtau should be real but just in case
+                     !mat_out(iq, inu) = mat_out(iq, inu) - (-type_plrn) * dtemp/ctemp * mat_in(iRp, ika)
+                  END IF
+               END DO
+            END DO
+         END DO
+      END DO
+      ! sum all the cell index ip
+      CALL mp_sum(mat_out, inter_pool_comm )
+
+   END SUBROUTINE
+   !!-----------------------------------------------------------------------
+   !! JLB: Fourier transform between Bmat and dtau for non-diagonal supercells
+   SUBROUTINE scell_plrn_bmat_tran(ttype, t_rev, mat_in, nqtotf_p, nRp_p, Rp_p, &
+         nrr_q, ndegen_q, irvec_q, rws, nrws, mat_out)
+      USE elph2,         ONLY : xqf, nqtotf, nkf, wf
+      USE modes,         ONLY : nmodes
+      USE constants_epw, ONLY : eps8, czero, one, two, twopi, zero, ci, cone
+      USE ions_base,     ONLY : amass, ityp
+      USE wan2bloch,     ONLY : dynwan2bloch, dynifc2blochf
+      USE epwcom,        ONLY : lifc
+      USE mp_global,     ONLY : inter_pool_comm
+      USE mp,            ONLY : mp_sum
+      USE division,      ONLY : fkbounds
+      USE io_global,     ONLY : stdout, ionode
+      USE epwcom,        ONLY : type_plrn
+
+      IMPLICIT NONE
+
+      CHARACTER(LEN=9), INTENT(IN) :: ttype
+      INTEGER, INTENT (IN) :: nqtotf_p, nRp_p, Rp_p(:,:)
+      LOGICAL, INTENT (IN) :: t_rev
+      COMPLEX(DP), INTENT(IN) :: mat_in(:, :)
+      INTEGER, INTENT (IN) :: nrr_q, ndegen_q(:,:,:)
+      INTEGER, INTENT (IN) :: irvec_q(3, nrr_q)
+      INTEGER,  INTENT (IN) :: nrws
+      REAL(DP), INTENT (IN) :: rws(:, :)
+      COMPLEX(DP), INTENT(OUT) :: mat_out(:, :)
+      LOGICAL     :: mirror_q
+      INTEGER     :: iq, inu, ierr, imu, iatm, idir, itype, ika, ip_start, ip_end, iRp
+      INTEGER     :: ix, iy, iz, ina, iqpg, nqf, start_modes
+      COMPLEX(DP) :: dtemp, uf(nmodes, nmodes)
+      REAL(DP)    :: xxq(3), xxq_r(3), ctemp, w2(nmodes)!, wf(:,:)
+      INTEGER     :: jnu, ndegen(nmodes), imode, jmode
+
+      IF(ttype == 'Bmat2Dtau') THEN
+         itype =  1
+      ELSE IF (ttype == 'Dtau2Bmat') THEN
+         itype = -1
+      ELSE
+         CALL errore('plrn_eigvec_tran', 'Illegal translation form; should be Bmat2Dtau or Dtau2Bmat!', 1)
+      END IF
+
+      uf = czero
+      w2 = zero
+      wf = zero
+
+      mat_out = czero
+
+      CALL fkbounds(nRp_p, ip_start, ip_end)
+
+      DO iq = 1, nqtotf_p ! iq -> q
+         xxq = xqf(1:3, iq)
+         xxq_r = xxq(1:3)
+         mirror_q = .false.
+         ! if we need to force the time-rev symmetry, we have to ensure that the phase of uf is fixed
+         ! i.e. uf = uf*(-q)
+         IF (t_rev) THEN
+            IF (is_mirror_q (iq)) THEN
+               xxq_r = xqf(1:3, kpg_map(iq))
+               mirror_q = .true.
+            END IF
+         END IF
+
+         ! Get phonon eigenmode and eigenfrequencies
+         IF (.NOT. lifc) THEN
+            ! Incompatible bugs found 9/4/2020 originated from the latest EPW changes.
+            ! parallel q is not working any more due to mp_sum in rgd_blk
+            CALL dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq_r, uf, w2, mirror_q)
+         ELSE
+            CALL dynifc2blochf(nmodes, rws, nrws, xxq_r, uf, w2, mirror_q)
+         ENDIF
+
+         DO inu = 1, nmodes
+            IF (w2(inu) > -eps8) THEN
+               wf(inu, iq) =  DSQRT(ABS(w2(inu)))
+            ELSE
+               !wf(inu, iq) = -DSQRT(ABS(w2(inu)))
+               wf(inu, iq) = 0.d0
+            ENDIF
+         END DO
+
+         ! For mirror q, calculate the time-symmetric q' and get uf from q'
+         ! e_{\kappa\alpha\nu}(-q)= e^*_{\kappa\alpha\nu}(q)
+         !!IF(t_rev .and. iq > iqpg) uf = CONJG(uf) !transpose
+         DO inu = 1, nmodes ! inu -> nu
+            IF (wf(inu, iq) < eps8) CYCLE !JLB - cycle zero and imaginary frequency modes
+            DO ika = 1, nmodes ! ika -> kappa alpha
+               ina = (ika - 1) / 3 + 1
+               ctemp = DSQRT(two/(wf(inu, iq) * amass(ityp(ina))))
+               !
+               !DO iRp = 1, nRp
+               DO iRp = ip_start, ip_end
+                  ! D_{\kappa\alpha\nu,p}(q) = e_{\kappa\alpha,\nu}(q) \exp(iq\cdot R_p)
+                  dtemp = uf(ika, inu) * EXP( twopi * ci * DOT_PRODUCT(xxq(1:3), Rp_p(1:3, iRp)) )
+                  IF (itype == 1) THEN ! Bqv -> dtau
+                     ! \Delta \tau_{\kappa\alpha p} = -\frac{1}{N_p} \sum_{q\nu} C_{\kappa\nu q} D_{\kappa\alpha\nu q}  B^*_{q\nu}
+                     ! Dtau(iRp, ika) = Dtau(iRp, ika) + conjg(B(iq, inu)) * ctemp * dtemp
+                     mat_out(iRp, ika) = mat_out(iRp, ika) -  cone/REAL(nRp, dp) * dtemp * ctemp &
+                        * (-type_plrn) * CONJG(mat_in(iq, inu))
+                  ELSE IF(itype == -1) THEN
+                     !  B_{q\nu} = \frac{1}{N_p} \sum_{\kappa\alpha p} D_{\kappa \alpha\nu, p}(q) C_{q}\nu \Delta\tau_{\kappa\alpha p}
+                     mat_out(iq, inu) = mat_out(iq, inu) - (-type_plrn) * dtemp/ctemp * CONJG(mat_in(iRp, ika)) !JLB: dtau should be real but just in case
+                     !mat_out(iq, inu) = mat_out(iq, inu) - (-type_plrn) * dtemp/ctemp * mat_in(iRp, ika)
+                  END IF
+               END DO
+            END DO
+         END DO
+      END DO
+      ! sum all the cell index ip
+      CALL mp_sum(mat_out, inter_pool_comm )
+      !
+   END SUBROUTINE
+   !-----------------------------------------------------------------------
+   SUBROUTINE calc_den_of_state(eigVec, Bmat)
+      USE epwcom,        ONLY : nDOS_plrn, edos_max_plrn, edos_min_plrn, edos_sigma_plrn
+      USE epwcom,        ONLY : pdos_max_plrn, pdos_min_plrn, pdos_sigma_plrn
+      USE epwcom,        ONLY : nstate_plrn, nkf1, nkf2, nkf3
+      USE elph2,         ONLY : nkf, nqtotf, nktotf, xkf
+      USE elph2,         ONLY : ibndmin, ibndmax, wf
+      USE modes,         ONLY : nmodes
+
+      USE constants_epw, ONLY : ryd2mev, czero, one, ryd2ev, two, zero, cone
+      USE constants_epw, ONLY : pi, ci, twopi, eps6, eps8, eps5
+
+      IMPLICIT NONE
+
+      INTEGER     :: idos
+      INTEGER     :: iq, inu, ik, ikk, ikq, ik_global, iplrn, ikpg, icount
+      INTEGER     :: ibnd, jbnd, ix, iy, iz
+      INTEGER     :: dos_file, indexkn1
+
+      COMPLEX(DP), INTENT(IN) :: eigVec(:, :), Bmat(:, :)
+
+      REAL(DP), ALLOCATABLE :: rmat_tmp(:, :), rvec_tmp(:)
+      REAL(DP), ALLOCATABLE :: edos(:), pdos(:), edos_all(:), pdos_all(:), e_grid(:), p_grid(:)
+      REAL(DP) :: temp
+
+      !Calculating DOS
+      ALLOCATE(rvec_tmp(nDOS_plrn))
+      ALLOCATE(e_grid(nDOS_plrn))
+      ALLOCATE(edos(nDOS_plrn))
+      ALLOCATE(edos_all(nDOS_plrn))
+      temp = MAXVAL(etf_all) * ryd2ev + one
+      IF(edos_max_plrn < temp) edos_max_plrn = temp
+      temp = MINVAL(etf_all) * ryd2ev - one
+      IF(edos_min_plrn > temp) edos_min_plrn = temp
+
+      e_grid = zero
+      DO idos = 1, nDOS_plrn
+         e_grid(idos) = edos_min_plrn + idos*(edos_max_plrn - edos_min_plrn)/(nDOS_plrn)
+      END DO
+
+      edos = zero
+      edos_all = zero
+      DO ik = 1, nktotf
+         DO ibnd = 1, nbnd_plrn
+            ! TODO : iplrn
+            DO iplrn = 1, 1
+               CALL cal_f_delta(e_grid - (etf_all(select_bands_plrn(ibnd), ik) * ryd2ev), &
+                  edos_sigma_plrn, rvec_tmp)
+               indexkn1 = (ik-1)*nbnd_plrn + ibnd
+               edos = edos + (ABS(eigVec(indexkn1, iplrn))**2)*rvec_tmp
+               edos_all = edos_all + rvec_tmp
+            END DO
+         END DO
+      END DO
+
+      ALLOCATE(p_grid(nDOS_plrn))
+      ALLOCATE(pdos(nDOS_plrn))
+      ALLOCATE(pdos_all(nDOS_plrn))
+
+      temp = MAXVAL(wf) * ryd2mev + 10.0_dp
+      IF(pdos_max_plrn < temp) pdos_max_plrn = temp
+      temp = MINVAL(wf) * ryd2mev - 10.0_dp
+      IF(pdos_min_plrn > temp) pdos_min_plrn = temp
+
+      p_grid = zero
+      DO idos = 1, nDOS_plrn
+         p_grid(idos) = pdos_min_plrn + idos*(pdos_max_plrn - pdos_min_plrn)/(nDOS_plrn)
+      END DO
+
+      pdos = zero
+      pdos_all = zero
+      DO iq = 1, nqtotf
+         DO inu = 1, nmodes
+            CALL cal_f_delta(p_grid - wf(inu, iq) * ryd2mev, pdos_sigma_plrn, rvec_tmp)
+            pdos = pdos + (ABS(Bmat(iq, inu))**2)*rvec_tmp
+            pdos_all = pdos_all + rvec_tmp
+         END DO
+      END DO
+
+      dos_file = 601
+      OPEN(UNIT = dos_file, FILE = 'dos.plrn')
+      WRITE(dos_file, '(/2x, a/)') '#energy(ev)  A^2   edos  energy(mev)  B^2  pdos'
+      DO idos = 1, nDOS_plrn
+         WRITE(dos_file, '(6f15.7)') e_grid(idos), edos(idos), &
+            edos_all(idos), p_grid(idos), pdos(idos), pdos_all(idos)
+      END DO
+      CLOSE(dos_file)
+
+      DEALLOCATE(e_grid, edos, edos_all)
+      DEALLOCATE(p_grid, pdos, pdos_all)
+      DEALLOCATE(rvec_tmp)
+   END SUBROUTINE
+   !
+   SUBROUTINE write_real_space_wavefunction()
+      USE constants_epw, ONLY : zero, czero, cone, twopi, ci, bohr2ang
+      USE epwcom,        ONLY : nbndsub, step_wf_grid_plrn
+      USE io_var,        ONLY : iun_plot
+      USE io_files,      ONLY : prefix
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE cell_base,     ONLY : at, alat
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE mp_world,      ONLY : world_comm
+      USE division,      ONLY : fkbounds
+      USE mp_global,     ONLY : inter_pool_comm
+
+      IMPLICIT NONE
+
+      INTEGER :: nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbnd_plrn_p
+      INTEGER :: nqf_p(3), nqtotf_p, nmodes_p
+      INTEGER :: ibnd, jbnd, iline, nAtoms, idir, file_unit, igrid, itemp, indexkn1
+      INTEGER :: nxx, nyy, nzz, ipx, ipy, ipz, ip_min, ip_max, ig_vec(1:3)
+      INTEGER :: iscx, iscy, iscz, ie, i_species, ivec, iRp
+      INTEGER :: n_grid(3), grid_start(3), grid_end(3), n_grid_super(3), r_grid_vec(3)
+      INTEGER :: rpc(1:3), ipc(1:3), shift_start(1:3), species(50), Rp_vec(1:3), shift(1:3), ishift
+      COMPLEX(dp), ALLOCATABLE :: eigvec_wan(:, :), dtau(:, :), cvec(:)
+      COMPLEX(dp) :: Amp, ctemp(1:3), b_vec(1:3)
+      CHARACTER(LEN = 60) :: plrn_file
+      REAL(dp), ALLOCATABLE :: wann_func(:, :, :, :), rvec(:)
+      REAL(dp) :: orig(3), rtempvec(4), cell(3, 3), rtemp(5), tempDen(5,5,5)
+      REAL(dp) :: r_cry(3), r_cart(3)
+
+      file_unit = 60512
+      ! read Amp.plrn, save eigvec_wan for the latter use
+      CALL read_plrn_wf(eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbnd_plrn_p, 'Amp.plrn')
+      ! read dtau.plrn, get the displacement.
+      CALL read_plrn_dtau(dtau, nqf_p(1), nqf_p(2), nqf_p(3), nqtotf_p, nmodes_p, 'dtau.plrn')
+      ! read cube files for the real-space Wannier function Wm(r)
+      CALL read_wannier_cube(select_bands_plrn, wann_func, species, &
+         n_grid, grid_start, grid_end)
+
+      cell(1:3, 1) = at(1:3, 1) * nqf_p(1) * alat
+      cell(1:3, 2) = at(1:3, 2) * nqf_p(2) * alat
+      cell(1:3, 3) = at(1:3, 3) * nqf_p(3) * alat
+
+      plrn_file = 'psir_plrn.xsf'
+      ! Write the file head including information of structures,
+      ! using the same format of
+      CALL write_plrn_dtau_xsf(dtau, nqf_p(1), nqf_p(2), nqf_p(3), plrn_file, species)
+
+      orig(1:3) = zero
+      n_grid_super(1:3) = nqf_p(1:3) * n_grid(1:3)
+
+      IF(ionode) THEN
+         OPEN(UNIT = file_unit, FILE = TRIM(plrn_file), POSITION='APPEND')
+         WRITE (file_unit, '(/)')
+         WRITE (file_unit, '("BEGIN_BLOCK_DATAGRID_3D",/,"3D_field",/, "BEGIN_DATAGRID_3D_UNKNOWN")')
+         WRITE (file_unit, '(3i6)')  n_grid_super / step_wf_grid_plrn
+         WRITE (file_unit, '(3f12.6)') zero, zero, zero
+         WRITE (file_unit, '(3f12.7)') cell(1:3, 1)*bohr2ang
+         WRITE (file_unit, '(3f12.7)') cell(1:3, 2)*bohr2ang
+         WRITE (file_unit, '(3f12.7)') cell(1:3, 3)*bohr2ang
+      END IF
+
+      b_vec(1:3) = twopi * ci / REAL(n_grid_super(1:3))
+
+
+      !allocate(cvec(grid_start_min(1): n_grid_super(1) + grid_start_min(1) - 1))
+      ALLOCATE(cvec(1:n_grid_super(1)))
+
+      CALL fkbounds(nqtotf_p, ip_min, ip_max)
+
+      !      do nzz = grid_start_min(3), n_grid_super(3) + grid_start_min(3) - 1
+      !         do nyy = grid_start_min(2), n_grid_super(2) + grid_start_min(2) - 1
+      !            cvec = czero
+      !            do nxx = grid_start_min(1), n_grid_super(1) + grid_start_min(1) - 1
+      ctemp = czero
+      rtemp = czero
+      DO nzz = 1, n_grid_super(3), step_wf_grid_plrn
+         !IF (MOD(nzz - 1, step_wf_grid_plrn)/=0) CYCLE
+         DO nyy = 1, n_grid_super(2), step_wf_grid_plrn
+            !IF (MOD(nyy - 1, step_wf_grid_plrn)/=0) CYCLE
+            cvec = czero
+            DO nxx = 1, n_grid_super(1), step_wf_grid_plrn
+               !IF (MOD(nxx - 1, step_wf_grid_plrn)/=0) CYCLE
+               !icount = 0
+               DO iRp = ip_min, ip_max !-nqf_p(3)/2, (nqf_p(3)+1)/2
+                  Rp_vec(1:3) = index_Rp(iRp, nqf_p)
+                  rpc(1:3) = Rp_vec(1:3) * n_grid(1:3) !- (n_grid_super(1:3))/2
+                  ! To make sure that all the nonzero points are included,
+                  ! we need to try from -1 to 1 neighbor supercells
+                  DO ishift = 1, 27
+                     shift(1:3) = index_shift(ishift)
+                     ig_vec(1:3) = (/nxx, nyy, nzz/) - rpc(1:3) + shift(1:3) * n_grid_super(1:3)
+                     IF(ALL(ig_vec(1:3) <= grid_end(1:3)) .and. &
+                        ALL(ig_vec(1:3) >= grid_start(1:3))) THEN
+                        !print *,"igx, igy, igz, nxx, nyy, nzz, ipx, ipy, ipz:",igx, igy, igz, nxx, nyy, nzz, ipx, ipy, ipz
+                        DO ibnd = 1, nbndsub !TODO change to nbndsub
+                           !cvec(nxx) = cvec(nxx) + wann_func(igx, igy, igz, ibnd)
+                           indexkn1 = (iRp - 1) * nbndsub + ibnd
+                           !TODO eigvec_wan(indexkn1, 1) should be eigvec_wan(indexkn1, iplrn)
+                           cvec(nxx) = cvec(nxx)  +  eigvec_wan(indexkn1, 1) * wann_func(ig_vec(1), ig_vec(2), ig_vec(3), ibnd)
+                           !cvec(nxx) = cvec(nxx)  +  wann_func(igx, igy, igz, ibnd) !*  eigvec_wan(indexkn1, 1)
+                        END DO !ibnd
+                     END IF
+                        !END DO ! iscz
+                     !END DO ! iscy
+                  END DO ! iscx
+                     !END DO ! ipz
+                  !ND DO ! ipy
+               END DO ! ipx
+            END DO ! nxx
+            CALL mp_sum(cvec, inter_pool_comm)
+            IF(ionode) THEN
+               !WRITE (file_unit, '(5e13.5)', ADVANCE='yes') REAL(cvec(::step_wf_grid_plrn))
+               !JLB: Changed to |\Psi(r)|^{2}, I think it's physically more meaningful
+               WRITE (file_unit, '(5e13.5)', ADVANCE='yes') ABS(cvec(::step_wf_grid_plrn))**2
+            END IF
+            ! Calculate the center of polaron
+            ! TODO: not parallel, all the processors are doing the same calculations
+            DO nxx = 1, n_grid_super(1)
+               r_grid_vec(1:3) = (/nxx - 1, nyy - 1, nzz - 1/)
+               ctemp(1:3) = ctemp(1:3) + EXP(b_vec(1:3) * r_grid_vec(1:3)) * (cvec(nxx)**2)
+            END DO
+            ! End calculating the center of polaron
+         END DO
+      END DO
+
+      r_cry(1:3) = IMAG(LOG(ctemp(1:3)))/twopi
+      ! make crystal coordinates with 0 to 1
+      r_cry(1:3) = r_cry - FLOOR(r_cry)
+      r_cart(1:3) = zero
+      DO idir = 1, 3
+         r_cart(1:3) = r_cart(1:3) + r_cry(idir) * cell(1:3, idir)
+      END DO
+
+
+      IF(ionode) THEN
+         !WRITE(stdout, "(5x, 'The norm of polaron wavefunction:', 3e11.4)") rtemp
+         WRITE(stdout, "(5x, 'The position of polaron:')")
+         WRITE(stdout, "(5x, 3f9.4, ' in crystal coordinates')") r_cry(1:3)
+         WRITE(stdout, "(5x, 3f9.4, ' in Cartesian coordinates (Angstrom)')") r_cart(1:3)
+         WRITE (file_unit, '("END_DATAGRID_3D",/, "END_BLOCK_DATAGRID_3D")')
+         CLOSE(file_unit)
+         !JLB
+         WRITE(stdout, "(5x, '|\Psi(r)|^2 written to file.')")
+      END IF
+   !DO ipx = 1, nqf_p(1) !-nqf_p(3)/2, (nqf_p(3)+1)/2
+      !DO ipy = 1, nqf_p(2)!-nqf_p(3)/2, (nqf_p(3)+1)/2
+         !DO ipz = 1, nqf_p(3)!-nqf_p(3)/2, (nqf_p(3)+1)/2
+            !icount = icount + 1
+               !rpc(1:3) = ((/ipx - 1, ipy - 1, ipz - 1/) - nqf_p(1:3)/2) * n_grid(1:3) !- (n_grid_super(1:3))/2
+               !rpc(1:3) = ((/ipx, ipy, ipz/) - nqf_p(1:3)/2) * n_grid(1:3) !- (n_grid_super(1:3))/2
+                                 !DO iscx = -1, 1
+         !DO iscy = -1, 1
+            !DO iscz = -1, 1
+!         igx = nxx - rpc(1) + iscx * n_grid_super(1)
+!         igy = nyy - rpc(2) + iscy * n_grid_super(2)
+!         igz = nzz - rpc(3) + iscz * n_grid_super(3)
+      DEALLOCATE(wann_func, cvec, eigvec_wan)
+   END SUBROUTINE
+   !
+   !
+   !!JLB: write psir in transformed supercell
+   !!     xsf format no longer compatible, 
+   !!     as .cube files are written in primitive coords.
+   SUBROUTINE scell_write_real_space_wavefunction()
+
+      USE constants_epw, ONLY : zero, czero, cone, twopi, ci, bohr2ang
+      USE epwcom,        ONLY : nbndsub, step_wf_grid_plrn, as, scell_mat
+      USE io_var,        ONLY : iun_plot, iunRpscell, iunpsirscell
+      USE io_files,      ONLY : prefix
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE cell_base,     ONLY : at, alat
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE mp_world,      ONLY : world_comm
+      USE division,      ONLY : fkbounds
+      USE mp_global,     ONLY : inter_pool_comm
+      USE low_lvl,       ONLY : matinv3
+
+      IMPLICIT NONE
+
+      CHARACTER(LEN = 60) :: plrn_file
+      INTEGER :: nktotf_p, nkf1_p, nkf2_p, nkf3_p, nbnd_plrn_p, species(50)
+      INTEGER :: ibnd, indexkn1, ig_vec(1:3)
+      INTEGER :: ir1, ir2, ir3, iRp1, iRp2, n_grid_total
+      INTEGER :: ip_min, ip_max
+      INTEGER :: n_grid(3), grid_start(3), grid_end(3), n_grid_super(3), r_in_crys_p_sup(3)
+      INTEGER :: ishift, shift(3)
+      REAL(DP), ALLOCATABLE :: wann_func(:, :, :, :), rvec(:)
+      REAL(DP) :: r_in_crys_p(3), r_in_crys_s(3), r_in_cart(3), Rp_in_cart(3)
+      REAL(DP) :: p2s(3,3), s2p(3,3)
+      COMPLEX(DP) :: cvec
+      COMPLEX(DP), ALLOCATABLE :: eigvec_wan(:, :)
+
+      ! Broadcast supercell lattice vectors
+      CALL mp_bcast(as, meta_ionode_id, world_comm)
+
+      ! read Amp.plrn, save eigvec_wan
+      CALL read_plrn_wf(eigvec_wan, nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbnd_plrn_p, 'Amp.plrn', .true.)
+      ! read cube files for the real-space Wannier function Wm(r)
+      CALL read_wannier_cube(select_bands_plrn, wann_func, species, &
+         n_grid, grid_start, grid_end)
+      ! Read list of Rp-s within supercell
+      CALL read_Rp_in_S()
+      WRITE(stdout, '(a, i12)') "     Number of unit cells within supercell:", nRp
+
+      ! Open file
+      !plrn_file = 'psir_plrn.scell.dat'
+      plrn_file = 'psir_plrn.scell.csv'
+      IF (ionode) THEN
+        OPEN (UNIT=iunpsirscell, FILE=TRIM(plrn_file), FORM='formatted', STATUS='unknown')
+        !WRITE (iunpsirscell, '(a)') "# x , y , z (Angstrom), |\psi(r)|^2"
+        WRITE (iunpsirscell, '(a)') "x , y , z, |\psi(r)|^2"
+      END IF
+
+      ! Total number of grid points
+      n_grid_total = nRp*n_grid(1)*n_grid(2)*n_grid(3)
+      WRITE(stdout, '(a, i12)') "     Total grid points:", n_grid_total
+      WRITE(stdout, '(a, i12)') "     Step:", step_wf_grid_plrn
+
+      ! Parallelize iRp
+      CALL fkbounds(nRp, ip_min, ip_max)
+
+      ! Matrix to transform from primitive to supercell crystal coordinates
+      p2s = matinv3(TRANSPOSE(as))
+      p2s = MATMUL(p2s,at)
+      ! Supercell to primitive coordinates
+      s2p = matinv3(p2s)
+      !s2p = matinv3(REAL(scell_mat, DP))
+
+
+      ! Loop over all the grid points
+      DO iRp1 = 1, nRp
+         !
+         DO ir1 = 1, n_grid(1), step_wf_grid_plrn
+            DO ir2 = 1, n_grid(2), step_wf_grid_plrn
+               DO ir3 = 1, n_grid(3), step_wf_grid_plrn
+                  !
+                  r_in_crys_p(1:3) = (/ REAL(ir1-1, DP)/n_grid(1), REAL(ir2-1, DP)/n_grid(2), REAL(ir3-1, DP)/n_grid(3) /) &
+                                     + REAL(Rp(1:3, iRp1),DP)
+                  !
+                  ! Wannier functions stored in (1:ngrid*iRp) list
+                  !r_in_crys_p_sup(1:3) = n_grid(1:3) * &
+                  !                       ((/ REAL(ir1, DP)/n_grid(1), REAL(ir2, DP)/n_grid(2), REAL(ir3, DP)/n_grid(3) /) + Rp(1:3, iRp1))
+                  r_in_crys_p_sup(1:3) = (/ir1, ir2, ir3/) +  Rp(1:3, iRp1) * n_grid(1:3)
+                  !
+                  ! Move the r-point to the first supercell and store in cartesian coordinates for plotting
+                  r_in_crys_s = MATMUL(p2s, r_in_crys_p)
+                  r_in_crys_s = MODULO(r_in_crys_s, (/1.d0, 1.d0, 1.d0/))
+                  r_in_cart   = MATMUL(TRANSPOSE(as), r_in_crys_s) * alat * bohr2ang
+                  !
+                  ! Sum over p, PRB 99, 235139 Eq.(47)
+                  cvec = czero
+                  DO iRp2 = ip_min, ip_max !1, nRp
+                     !
+                     DO ishift = 1, 27
+                       !
+                       shift(1:3) = index_shift(ishift)
+                       ig_vec(1:3) = r_in_crys_p_sup(1:3) - Rp(1:3, iRp2) * n_grid(1:3) + MATMUL(s2p, shift(1:3)) * n_grid(1:3)
+                       !
+                       !ig_vec(1:3) = r_in_crys_p_sup(1:3) - Rp(1:3, iRp2) * n_grid(1:3)
+                       !
+                       IF(ALL(ig_vec(1:3) <= grid_end(1:3)) .and. &
+                          ALL(ig_vec(1:3) >= grid_start(1:3))) THEN
+                          !
+                          DO ibnd = 1, nbndsub ! sum over m
+                             !
+                             indexkn1 = (iRp2 - 1) * nbndsub + ibnd
+                             cvec = cvec + eigvec_wan(indexkn1, 1) * wann_func(ig_vec(1), ig_vec(2), ig_vec(3), ibnd)
+                             !
+                          END DO !ibnd
+                          !
+                       END IF
+                       !
+                     END DO ! ishift
+                     !
+                  END DO ! iRp2
+                  CALL mp_sum(cvec, inter_pool_comm)
+                  !
+                  ! Write |\psi(r)|^2 data point to file
+                  IF (ionode) THEN
+                     !WRITE (iunpsirscell, '(3f12.6, E13.5)') r_in_cart(1:3), ABS(cvec)**2
+                     WRITE (iunpsirscell, '(f12.6,", ", f12.6,", ", f12.6,", ", E13.5)') r_in_cart(1:3), ABS(cvec)**2
+                  END IF
+                  !
+               END DO ! ir3
+            END DO ! ir2
+         END DO ! ir1
+      END DO ! iRp1
+      !
+      IF (ionode) THEN
+        CLOSE (iunpsirscell)
+        WRITE(stdout, "(5x, '|\Psi(r)|^2 written to file.')")
+      END IF
+      DEALLOCATE(wann_func, eigvec_wan)
+      !
+   END SUBROUTINE
+   !
+   ! Read the nth Wannier function from prefix_0000n.cube file
+   SUBROUTINE read_wannier_cube(select_bands, wann_func, species, n_grid, grid_start_min, grid_end_max)
+      USE constants_epw, ONLY : zero, czero, cone
+      USE io_var,        ONLY : iun_plot
+      USE io_files,      ONLY : prefix
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE cell_base,     ONLY : at, alat
+      USE mp,            ONLY : mp_sum, mp_bcast
+      USE mp_world,      ONLY : world_comm
+      USE division,      ONLY : fkbounds
+      USE mp_global,     ONLY : inter_pool_comm
+      USE epwcom,        ONLY : nbndsub
+
+      IMPLICIT NONE
+
+      INTEGER,  INTENT(IN)  :: select_bands(:)
+      REAL(dp), ALLOCATABLE, INTENT(OUT) :: wann_func(:, :, :, :)
+      INTEGER,  INTENT(OUT) :: species(50)
+      INTEGER,  INTENT(OUT) :: n_grid(3), grid_start_min(3), grid_end_max(3)
+
+      INTEGER :: ibnd_index, ibnd, ie, idir, i_species, nbnd, iline, nAtoms
+      INTEGER :: nxx, nyy, nzz, n_len_z
+      INTEGER :: grid_start(3), grid_end(3)
+      REAL(dp) :: rtempvec(4), norm
+      CHARACTER(LEN = 60) :: wancube, temp_str
+
+      nbnd = SIZE(select_bands)
+      !print *, "nbnd", nbnd
+
+      ! find the max and min of real space grid of Wannier functions of all Wannier orbitals
+      IF(ionode) THEN
+         grid_start_min(:) = 100000
+         grid_end_max(:) = -100000
+         DO ibnd = 1, nbndsub
+            WRITE(wancube, "(a, '_', i5.5, '.cube')") TRIM(prefix), ibnd
+            OPEN(UNIT = iun_plot, FILE=TRIM(wancube), FORM='formatted', STATUS='unknown')
+            READ(iun_plot, *) temp_str !, temp_str, temp_str, temp_str, temp_str, temp_str, temp_str, temp_str
+            !print *, temp_str
+            READ(iun_plot, *) n_grid, grid_start, grid_end
+            DO idir = 1, 3
+               IF(grid_start_min(idir) >= grid_start(idir)) grid_start_min(idir) = grid_start(idir)
+               IF(grid_end_max(idir) <= grid_end(idir))   grid_end_max(idir) = grid_end(idir)
+            END DO
+            CLOSE(iun_plot)
+         END DO
+         !print *, "     n_grid, grid_start, grid_end", n_grid, grid_start_min, grid_end_max
+      END IF
+
+      CALL mp_bcast(n_grid,         meta_ionode_id, world_comm)
+      CALL mp_bcast(grid_start_min, meta_ionode_id, world_comm)
+      CALL mp_bcast(grid_end_max,   meta_ionode_id, world_comm)
+      !CALL mp_bcast(shift_start,    meta_ionode_id, world_comm)
+
+      ! Read the xth Wannier functions from prefix_0000x.cube in ionode
+      ! and broadcast to all nodes
+      ALLOCATE(wann_func(grid_start_min(1):grid_end_max(1), &
+         grid_start_min(2):grid_end_max(2), &
+         !grid_start_min(3):grid_end_max(3), nbnd))
+         grid_start_min(3):grid_end_max(3), nbndsub))
+      wann_func = zero
+      species = 0
+      IF(ionode) THEN
+         DO ibnd = 1, nbndsub
+            WRITE(wancube, "(a, '_', i5.5, '.cube')") TRIM(prefix), ibnd
+            OPEN(UNIT = iun_plot, FILE=TRIM(wancube), FORM='formatted', STATUS='unknown')
+            READ(iun_plot, *) temp_str
+            READ(iun_plot, *) n_grid, grid_start, grid_end
+            READ(iun_plot, *) nAtoms, rtempvec(1:3)
+
+            DO iline = 1, 3
+               READ(iun_plot, '(8A)') temp_str
+            END DO
+            !read(iun_plot, '(i, 8A)') i_species, temp_str
+            !species(1) = i_species
+            ie = 1
+            DO iline = 1, nAtoms
+               READ(iun_plot, '(i4, 4f13.5)') i_species, rtempvec
+               !print *, i_species
+               IF (iline == 1 ) THEN
+                  species(ie) = i_species
+                  ie = ie + 1
+               ELSE IF (species(ie - 1) /= i_species) THEN
+                  species(ie) = i_species
+                  ie = ie + 1
+               END IF
+            END DO
+            n_len_z = grid_end(3) - grid_start(3) + 1
+
+            DO nxx = grid_start(1), grid_end(1)
+              DO nyy = grid_start(2), grid_end(2)
+                DO nzz = grid_start(3), grid_end(3), 6
+                  IF (grid_end(3) - nzz < 6) THEN
+                     READ(iun_plot, *) wann_func(nxx, nyy, nzz:grid_end(3)-1, ibnd)
+                  ELSE
+                     READ(iun_plot, '(6E13.5)') wann_func(nxx, nyy, nzz:nzz+5, ibnd)
+                  END IF
+                ENDDO
+              ENDDO
+            ENDDO
+!            READ(iun_plot, '(6E13.5)') (((wann_func(nxx, nyy, nzz, ibnd),    &
+!               nzz = grid_start(3), grid_end(3)), &
+!               nyy = grid_start(2), grid_end(2)), &
+!               nxx = grid_start(1), grid_end(1))
+            CLOSE(iun_plot)
+            ! Wannier function is not well normalized
+            ! Normalize here will make the calculations with Wannier functions easier
+            norm = SUM(wann_func(:, :, :, ibnd) * wann_func(:, :, :, ibnd))
+            wann_func(:, :, :, ibnd) = wann_func(:, :, :, ibnd)/SQRT(norm)
+         END DO
+      END IF
+      CALL mp_bcast(wann_func, meta_ionode_id, world_comm)
+      CALL mp_bcast(species, meta_ionode_id, world_comm)
+      !
+   END SUBROUTINE
+   !
+   SUBROUTINE read_Rp_in_S()
+      ! JLB
+      !! Read list of Rp unit cell vectors contained on transformed supercell
+      !
+      USE io_var,        ONLY : iunRpscell
+      USE io_global,     ONLY : stdout, ionode, meta_ionode_id
+      USE mp,            ONLY : mp_bcast
+      USE mp_world,      ONLY : world_comm
+      USE elph2,         ONLY : nqtotf
+      !
+      IMPLICIT NONE
+      !
+      INTEGER :: iRp, ierr, nRp2
+      !
+      IF(ionode) THEN
+         OPEN(UNIT = iunRpscell, FILE='Rp.scell.plrn', FORM='formatted', STATUS='unknown')
+         READ(iunRpscell, *) nRp
+         IF (nRp .ne. nqtotf) CALL errore('read_Rp_in_S', 'nRp and nqtotf are not the same!',1)
+         CLOSE(UNIT = iunRpscell)
+      END IF
+      CALL mp_bcast(nRp, meta_ionode_id, world_comm)
+      ALLOCATE(Rp(3, nRp), STAT = ierr)
+      IF (ierr /= 0) CALL errore('read_Rp_in_S', 'Error allocating Rp', 1)
+      Rp = 0
+      IF(ionode) THEN
+         OPEN(UNIT = iunRpscell, FILE='Rp.scell.plrn', FORM='formatted', STATUS='unknown')
+         READ(iunRpscell, *) nRp2
+         DO iRp=1,nRp
+            READ(iunRpscell, *) Rp(1:3, iRp)
+         END DO
+         CLOSE(UNIT = iunRpscell)
+      END IF
+      CALL mp_bcast(Rp, meta_ionode_id, world_comm)
+      !
+   END SUBROUTINE
+   !
+   FUNCTION index_Rp(iRp, nqfs)
+      USE epwcom, ONLY: nqf1, nqf2, nqf3
+
+      IMPLICIT NONE
+      INTEGER, INTENT(IN) :: iRp
+      INTEGER  :: index_Rp(1:3), nqf_c(1:3)
+      INTEGER, INTENT(IN), OPTIONAL  :: nqfs(1:3)
+
+
+      IF(PRESENT(nqfs)) THEN
+         nqf_c(1:3) = nqfs(1:3)
+      ELSE
+         nqf_c(1:3) = (/nqf1, nqf2, nqf3/)
+      END IF
+
+      index_Rp(1) = (iRp - 1)/(nqf_c(2) * nqf_c(3))
+      index_Rp(2) = MOD(iRp - 1, nqf_c(2) * nqf_c(3))/nqf_c(3)
+      index_Rp(3) = MOD(iRp - 1, nqf_c(3))
+
+      IF(ANY(index_Rp < 0) .or. ANY(index_Rp >= nqf_c)) THEN
+         CALL errore('index_Rp','index_Rp not correct!',1)
+      END IF
+   END FUNCTION
+   !
+   FUNCTION index_xp(delta_p)
+      USE elph2, ONLY : nqtotf
+      USE epwcom, ONLY : nqf1, nqf2, nqf3
+      IMPLICIT NONE
+
+      INTEGER, INTENT(IN)  :: delta_p(1:3)
+      INTEGER :: index_xp
+
+      index_xp = delta_p(1) * nqf2 * nqf3 + delta_p(2) * nqf3 + delta_p(3) + 1
+
+      IF(.NOT. ALL(index_Rp(index_xp) == delta_p(1:3))) THEN
+         CALL errore('index_xp', 'index_Rp not correct!', 1)
+      END IF
+   END FUNCTION
+   !
+   FUNCTION index_shift(ishift)
+      IMPLICIT NONE
+      INTEGER, INTENT(IN) :: ishift
+      INTEGER  :: index_shift(1:3)
+
+      index_shift(1) = (ishift - 1)/9 - 1
+      index_shift(2) = MOD(ishift - 1, 9)/3 - 1
+      index_shift(3) = MOD(ishift - 1, 3) - 1
+
+      IF(ANY(index_shift < -1) .or. ANY(index_shift > 1)) THEN
+         CALL errore('index_shift', 'index_shift not correct!', 1)
+      END IF
+   END FUNCTION
+
+END MODULE
diff --git a/EPW/src/pw2wan2epw.f90 b/EPW/src/pw2wan2epw.f90
index 5cb85877b..7ed9c6b52 100644
--- a/EPW/src/pw2wan2epw.f90
+++ b/EPW/src/pw2wan2epw.f90
@@ -3730,8 +3730,12 @@
       OPEN(UNIT = iun_plot, FILE=TRIM(wancube), FORM='formatted', STATUS='unknown')
       CALL date_and_tim(cdate, ctime )
       ! First two lines are comments
-      WRITE(iun_plot, *) '     Generated by EPW code'
-      WRITE(iun_plot, *) '     On ', cdate, ' at ', ctime
+!!!!!!
+      !WRITE(iun_plot, *) '     Generated by EPW code'
+      !WRITE(iun_plot, *) '     On ', cdate, ' at ', ctime
+      WRITE(iun_plot, *) '     Generated by EPW code     On ', cdate, ' at ', ctime
+      WRITE(iun_plot, '(9I4)') ngx, ngy, ngz, istart(1:3), iend(1:3)
+!!!!!!
       ! Number of atoms, origin of cube (Cartesians) wrt simulation (home) cell
       WRITE(iun_plot, '(i4,3f13.5)') icount, orig(1), orig(2), orig(3)
       !
diff --git a/EPW/src/test_tools.f90 b/EPW/src/test_tools.f90
deleted file mode 100644
index bc7fe2e2c..000000000
--- a/EPW/src/test_tools.f90
+++ /dev/null
@@ -1,201 +0,0 @@
-    module test_tools
-        USE kinds,     only : DP
-    INTERFACE para_write
-        MODULE PROCEDURE &
-            para_write_i, para_write_i1, para_write_i2, para_write_i3, para_write_i4, para_write_i5, para_write_i6, &
-            para_write_r, para_write_r1, para_write_r2, para_write_r3, para_write_r4, para_write_r5, para_write_r6,&
-            para_write_c, para_write_c1, para_write_c2, para_write_c3, para_write_c4, para_write_c5, para_write_c6
-    END INTERFACE
-    contains
-        SUBROUTINE para_write_i(A, filename)
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A
-        CHARACTER(LEN=*), INTENT(IN) :: filename
-    END SUBROUTINE
-    SUBROUTINE para_write_i1(A, varName)
-        USE mp_world,      ONLY : mpime
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A(:)
-        CHARACTER(LEN=*), INTENT(IN) :: varName
-        CHARACTER(LEN=256) :: filename
-        CHARACTER(LEN=256) :: lineLen
-
-        INTEGER :: bounds(1), i, j
-
-        bounds = SHAPE(A)
-        WRITE(filename,'(a, i0)') varName, mpime
-        OPEN(UNIT=23747806, FILE='./test_out/'//filename, POSITION="append", FORM='formatted')
-        WRITE(23747806, *) REPEAT('-',10), bounds
-        WRITE(23747806, '(I5)') (A(j), j=1, bounds(1))
-        !WRITE(lineLen,'("(",i0,"E12.4)")') bounds(1) ! x2 due to complex
-        !do i = 1, bounds(2)
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")') (A(j,i), j=1, bounds(1))
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")', advance="no") A(:,i)
-        !WRITE(23747806, lineLen) (A(j,i), j=1, bounds(1))
-        !end do
-        CLOSE(23747806)
-    END SUBROUTINE
-    SUBROUTINE para_write_i2(A)
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A(:,:)
-    END SUBROUTINE
-    SUBROUTINE para_write_i3(A)
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A(:,:,:)
-    END SUBROUTINE
-    SUBROUTINE para_write_i4(A)
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A(:,:,:,:)
-
-    END SUBROUTINE
-    SUBROUTINE para_write_i5(A)
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A(:,:,:,:,:)
-
-    END SUBROUTINE
-    SUBROUTINE para_write_i6(A)
-        IMPLICIT NONE
-        INTEGER, INTENT(IN) :: A(:,:,:,:,:,:)
-    END SUBROUTINE
-
-    SUBROUTINE para_write_r(A)
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A
-    END SUBROUTINE
-    SUBROUTINE para_write_r1(A, varName)
-        USE mp_world,      ONLY : mpime
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A(:)
-        CHARACTER(LEN=*), INTENT(IN) :: varName
-        CHARACTER(LEN=256) :: filename
-        CHARACTER(LEN=256) :: lineLen
-
-        INTEGER :: bounds(1), i, j
-
-        bounds = SHAPE(A)
-        WRITE(filename,'(a, i0)') varName, mpime
-        OPEN(UNIT=23747806, FILE='./test_out/'//filename, POSITION="append", FORM='formatted')
-        WRITE(23747806, *) REPEAT('-',10), bounds(1), 1
-        WRITE(23747806, '(E12.4)') (A(j), j=1, bounds(1))
-        !WRITE(lineLen,'("(",i0,"E12.4)")') bounds(1) ! x2 due to complex
-        !do i = 1, bounds(2)
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")') (A(j,i), j=1, bounds(1))
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")', advance="no") A(:,i)
-        !WRITE(23747806, lineLen) (A(j,i), j=1, bounds(1))
-        !end do
-        CLOSE(23747806)
-    END SUBROUTINE
-    SUBROUTINE para_write_r2(A, varName)
-        USE mp_world,      ONLY : mpime
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A(:,:)
-        CHARACTER(LEN=*), INTENT(IN) :: varName
-        CHARACTER(LEN=256) :: filename
-        CHARACTER(LEN=256) :: lineLen
-
-        INTEGER :: bounds(2), i, j
-
-        bounds = SHAPE(A)
-        WRITE(filename,'(a, i0)') varName, mpime
-        OPEN(UNIT=23747806, FILE='./test_out/'//filename, POSITION="append", FORM='formatted')
-        WRITE(23747806, *) REPEAT('-',10), bounds
-        WRITE(23747806, '(E12.4)') ((A(j,i), j=1, bounds(1)), i=1, bounds(2))
-        !WRITE(lineLen,'("(",i0,"E12.4)")') bounds(1) ! x2 due to complex
-        !do i = 1, bounds(2)
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")') (A(j,i), j=1, bounds(1))
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")', advance="no") A(:,i)
-        !WRITE(23747806, lineLen) (A(j,i), j=1, bounds(1))
-        !end do
-        CLOSE(23747806)
-    END SUBROUTINE
-    SUBROUTINE para_write_r3(A)
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A(:,:,:)
-    END SUBROUTINE
-    SUBROUTINE para_write_r4(A)
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A(:,:,:,:)
-
-    END SUBROUTINE
-    SUBROUTINE para_write_r5(A)
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A(:,:,:,:,:)
-
-    END SUBROUTINE
-    SUBROUTINE para_write_r6(A)
-        IMPLICIT NONE
-        REAL(dp), INTENT(IN) :: A(:,:,:,:,:,:)
-    END SUBROUTINE
-
-    SUBROUTINE para_write_c(A)
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A
-    END SUBROUTINE
-    SUBROUTINE para_write_c1(A, varName)
-        USE mp_world,      ONLY : mpime
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A(:)
-        CHARACTER(LEN=*), INTENT(IN) :: varName
-        CHARACTER(LEN=256) :: filename
-        CHARACTER(LEN=256) :: lineLen
-
-        INTEGER :: bounds(1), i, j
-
-        bounds = SHAPE(A)
-        WRITE(filename,'(a, i0)') varName, mpime
-        OPEN(UNIT=23747806, FILE='./test_out/'//filename, POSITION="append", FORM='formatted')
-        WRITE(23747806, *) REPEAT('-',10), bounds(1), 1
-        WRITE(23747806, '("(",E12.4,"+",E12.4,"j",")")') (A(j), j=1, bounds(1))
-        !WRITE(lineLen,'("(",i0,"E12.4)")') bounds(1) ! x2 due to complex
-        !do i = 1, bounds(2)
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")') (A(j,i), j=1, bounds(1))
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")', advance="no") A(:,i)
-        !WRITE(23747806, lineLen) (A(j,i), j=1, bounds(1))
-        !end do
-        CLOSE(23747806)
-    END SUBROUTINE
-    SUBROUTINE para_write_c2(A, varName)
-        USE mp_world,      ONLY : mpime
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A(:,:)
-        CHARACTER(LEN=*), INTENT(IN) :: varName
-        CHARACTER(LEN=256) :: filename
-        CHARACTER(LEN=256) :: lineLen
-
-        INTEGER :: bounds(2), i, j
-
-        bounds = SHAPE(A)
-        WRITE(filename,'(a, i0)') varName, mpime
-        OPEN(UNIT=23747806, FILE='./test_out/'//filename, POSITION="append", FORM='formatted')
-        WRITE(23747806, *) REPEAT('-',10), bounds
-        WRITE(23747806, '("(",E12.4,"+",E12.4,"j",")")') ((A(j,i), j=1, bounds(1)), i=1, bounds(2))
-        !WRITE(lineLen,'("(",i0,"E12.4)")') bounds(1) ! x2 due to complex
-        !do i = 1, bounds(2)
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")') (A(j,i), j=1, bounds(1))
-        !WRITE(23747806, '(E12.4,"+",E12.4,"j")', advance="no") A(:,i)
-        !WRITE(23747806, lineLen) (A(j,i), j=1, bounds(1))
-        !end do
-        CLOSE(23747806)
-    END SUBROUTINE
-    SUBROUTINE para_write_c3(A)
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A(:,:,:)
-    END SUBROUTINE
-    SUBROUTINE para_write_c4(A)
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A(:,:,:,:)
-
-    END SUBROUTINE
-    SUBROUTINE para_write_c5(A)
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A(:,:,:,:,:)
-
-    END SUBROUTINE
-    SUBROUTINE para_write_c6(A)
-        IMPLICIT NONE
-        COMPLEX(dp), INTENT(IN) :: A(:,:,:,:,:,:)
-    END SUBROUTINE
-    end module
-
-
-
diff --git a/EPW/src/utilities.f90 b/EPW/src/utilities.f90
index 1719311ce..f4566611d 100644
--- a/EPW/src/utilities.f90
+++ b/EPW/src/utilities.f90
@@ -1434,7 +1434,6 @@
     USE pwcom,         ONLY : ef
     USE mp,            ONLY : mp_max, mp_min
     USE mp_global,     ONLY : inter_pool_comm
-    USE epwcom,        ONLY : wfcelec
     USE constants_epw, ONLY : ryd2ev
     !
     IMPLICIT NONE
@@ -1470,20 +1469,6 @@
         !
       ENDDO
     ENDDO
-    IF (wfcelec) then
-      DO ik = 1, nkqf
-        DO ibnd = 1, nbndsub
-          ebnd = etf(ibnd, ik)
-          !
-          IF (ebnd < fsthick + ef .AND. ebnd > ef) THEN
-            ibndmin = MIN(ibnd, ibndmin)
-            ibndmax = MAX(ibnd, ibndmax)
-            ebndmin = MIN(ebnd, ebndmin)
-            ebndmax = MAX(ebnd, ebndmax)
-          ENDIF
-        ENDDO
-      ENDDO
-    ENDIF
     !
     tmp = DBLE(ibndmin)
     CALL mp_min(tmp, inter_pool_comm)
diff --git a/EPW/src/wan2bloch.f90 b/EPW/src/wan2bloch.f90
index 1db31f3f6..8483dc2c5 100644
--- a/EPW/src/wan2bloch.f90
+++ b/EPW/src/wan2bloch.f90
@@ -17,7 +17,10 @@
   CONTAINS
     !
     !--------------------------------------------------------------------------
-    SUBROUTINE hamwan2bloch(nbnd, nrr, cuf, eig, chw, cfac, dims)
+!!!!!
+   !SUBROUTINE hamwan2bloch(nbnd, nrr, cuf, eig, chw, cfac, dims)
+    SUBROUTINE hamwan2bloch(nbnd, nrr, cuf, eig, chw, cfac, dims, is_mirror)
+!!!!!
     !--------------------------------------------------------------------------
     !!
     !!  From the Hamiltonian in Wannier representation, find the corresponding
@@ -31,7 +34,10 @@
     !!  output : rotation matrix cuf(nbnd, nbnd)
     !!           interpolated hamiltonian eigenvalues eig(nbnd)
     !!
-    !!  2019: Weng Hong Sio and SP: Lifting of degeneracies.
+    !!  2021: CL : Replace the random perturbation matrix with prime number matrix
+    !!        in Lifting of degeneracies; control tag : lphase
+    !!  2021: CL : Rotate the the largest element in eigenvector to real axis. (lrot)
+    !!  2019: Weng Hong Sio and SP: Lifting of degeneracies. control tag: lrot
     !!        P_prime = U^dag P U where P is a random perturbation matrix
     !!        cuf = (eigvector of P_prime) * U
     !!        P_prime spans the degenenrate subspace.
@@ -43,6 +49,9 @@
     USE constants_epw, ONLY : czero, cone, zero, one, eps12, eps16
     USE epwcom,        ONLY : use_ws
     USE low_lvl,       ONLY : utility_zdotu, degen_sort
+!!!!!
+    USE epwcom,        ONLY : debug_plrn, lphase, lrot
+!!!!!
     !
     IMPLICIT NONE
     !
@@ -62,6 +71,10 @@
     !! Hamiltonian in Wannier basis
     COMPLEX(KIND = DP), INTENT(out) :: cuf(nbnd, nbnd)
     !! Rotation matrix U^\dagger, fine mesh
+!!!!!
+    LOGICAL, INTENT(IN), OPTIONAL :: is_mirror
+    !! .true. if k-point is a time-reversal invariant point
+!!!!!
     !
     ! Local variables
     LOGICAL :: duplicates
@@ -114,10 +127,22 @@
     !! Perturbation matrix made of small complex random number on the full space
     COMPLEX(KIND = DP), ALLOCATABLE :: cwork(:)
     !! Complex work variable
-    COMPLEX(KIND = DP), ALLOCATABLE :: P_prime(:, :)
+!!!!!
+    ! COMPLEX(KIND = DP), ALLOCATABLE :: P_prime(:, :)
+    REAL(KIND = DP), ALLOCATABLE :: P_prime(:, :)
+!!!!!
     !! Perturbation matrix on the subspace
     COMPLEX(KIND = DP), ALLOCATABLE :: Uk(:, :)
     !! Rotation matrix on the full space
+!!!!!
+    INTEGER :: ibnd_max(1)
+    !! Index of the maximum element
+    REAL(KIND = DP) :: norm_vec(nbnd)
+    !! Real Hamiltonian matrix in Bloch basis for TRI k points
+    COMPLEX(KIND = DP) :: cfac2(nrr, dims, dims)
+    !! cfac for the time-reversial point if pointed
+    COMPLEX(KIND = DP) :: fac_max
+!!!!!
     !
     CALL start_clock('HamW2B')
     !----------------------------------------------------------
@@ -133,17 +158,31 @@
     !  H~(k'+q') is chf( nbnd, nbnd, 2*ik   )
     !
     chf(:, :) = czero
+!!!!!!!
+    ! Calculate the -k if this is a mirror point
+    ! Do nothing if not specified
+    cfac2 = cfac
+    IF (PRESENT(is_mirror)) THEN
+        IF(is_mirror) cfac2 = CONJG(cfac)
+    END IF
+!!!!!!!
     !
     IF (use_ws) THEN
       DO iw = 1, dims
         DO iw2 = 1, dims
           DO ir = 1, nrr
-            chf(iw, iw2) = chf(iw, iw2) + chw(iw, iw2, ir) * cfac(ir, iw, iw2)
+!!!!!!!
+           !chf(iw, iw2) = chf(iw, iw2) + chw(iw, iw2, ir) * cfac(ir, iw, iw2)
+            chf(iw, iw2) = chf(iw, iw2) + chw(iw, iw2, ir) * cfac2(ir, iw, iw2)
+!!!!!!!
           ENDDO
         ENDDO
       ENDDO
     ELSE
-      CALL ZGEMV('n', nbnd**2, nrr, cone, chw, nbnd**2, cfac(:, 1, 1), 1, cone, chf, 1)
+!!!!!!!
+     !CALL ZGEMV('n', nbnd**2, nrr, cone, chw, nbnd**2, cfac(:, 1, 1), 1, cone, chf, 1)
+      CALL ZGEMV('n', nbnd**2, nrr, cone, chw, nbnd**2, cfac2(:, 1, 1), 1, cone, chf, 1)
+!!!!!!!
     ENDIF
     !
     !---------------------------------------------------------------------
@@ -199,24 +238,27 @@
       degen_group(2, ig) = degen_group(1, ig) + degen_group(2, ig) -1
     ENDDO
     !
+!!!!!!!
+    ! This old random matrix generation is removed
     ! Generate a pertubation matrix of size (nbnd x nbnd) made of random number
     !CALL init_random_seed()
     ! SP: Using random_number does not work because the perturbation needs to be the
     !     same when calling hamwan2bloch at k and k+q (see ephwann_shuffle).
     !     Therefore I fix a "random" number 0.25644832 + 0.01 * ibnd and 0.11584272 + 0.025 * jbnd
-    P(:, :) = czero
-    DO ibnd = 1, nbnd
-      DO jbnd = 1, nbnd
-        !CALL random_number(rand1)
-        !CALL random_number(rand2)
-        rand1 = 0.25644832 + 0.01 * ibnd
-        rand2 = 0.11584272 + 0.025 * jbnd
-        P(jbnd, ibnd) = CMPLX(rand1, rand2, KIND = DP)
-      ENDDO
-    ENDDO
+    !P(:, :) = czero
+    !DO ibnd = 1, nbnd
+    !  DO jbnd = 1, nbnd
+    !    !CALL random_number(rand1)
+    !    !CALL random_number(rand2)
+    !    rand1 = 0.25644832 + 0.01 * ibnd
+    !    rand2 = 0.11584272 + 0.025 * jbnd
+    !    P(jbnd, ibnd) = CMPLX(rand1, rand2, KIND = DP)
+    !  ENDDO
+    !dENDDO
     !
     ! Hermitize the Perturbation matrix and make it small
-    P = 0.5d0 * (P + TRANSPOSE(CONJG(P))) * ABS(MINVAL(w)) * 0.1d0
+    !P = 0.5d0 * (P + TRANSPOSE(CONJG(P))) * ABS(MINVAL(w)) * 0.1d0
+!!!!!!!
     !
     DO ig = 1, ndeg
       starting = degen_group(1, ig)
@@ -224,7 +266,10 @@
       ! Size of the degenerate subspace
       length   = ending - starting + 1
       !
-      ALLOCATE(rwork(1 + 5 * length + 2 * length**2), STAT = ierr)
+!!!!!!!
+      !ALLOCATE(rwork(1 + 5 * length + 2 * length**2), STAT = ierr)
+      ALLOCATE(rwork(length**2 + 2 * length), STAT = ierr)
+!!!!!!!
       IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating rwork', 1)
       ALLOCATE(iwork(3 + 5 * length), STAT = ierr)
       IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating iwork', 1)
@@ -238,14 +283,22 @@
       IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating wp', 1)
       !
       Uk(:, :) = cz(:, starting:ending)
-      P_prime = MATMUL(TRANSPOSE(CONJG(Uk)), MATMUL(P, Uk))
+      ! Create a matrix filled with prime numbers
+!!!!!!
+      ! P_prime = MATMUL(TRANSPOSE(CONJG(Uk)), MATMUL(P, Uk))
+      CALL prime_number_matrix(P_prime, length)
+!!!!!!
       ! Diagonalization of P_prime
-      CALL ZHEEVD('V', 'L', length, P_prime, length, wp, cwork, &
-                2 * length + length**2, rwork, 1 + 5 * length + 2 * length**2, &
-                iwork, 3 + 5 * length, info)
+!!!!!!
+!      CALL ZHEEVD('V', 'L', length, P_prime, length, wp, cwork, &
+!                2 * length + length**2, rwork, 1 + 5 * length + 2 * length**2, &
+!                iwork, 3 + 5 * length, info)
+      CALL DSYEV('V', 'L', length, P_prime, length, wp, rwork, &
+                length**2 + 2 * length, info)
+!!!!!!
       ! On exiting P_prime is the eigenvector of the P_prime matrix and wp the eigenvector.
       !
-      cz(:, starting:ending) = MATMUL(Uk, P_prime)
+      IF (lphase) cz(:, starting:ending) = MATMUL(Uk, P_prime)
       !
       DEALLOCATE(rwork, STAT = ierr)
       IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating rwork', 1)
@@ -261,11 +314,23 @@
       IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating wp', 1)
     ENDDO ! ig
     !
+!!!!!!
+    ! Find the largest element and set it to pure real
+    IF(lrot) THEN
+       DO ibnd = 1, nbnd
+         DO jbnd = 1, nbnd
+            norm_vec(jbnd) = ABS(cz(jbnd, ibnd))
+         END DO
+         ibnd_max(:) = MAXLOC(norm_vec(1:nbnd))
+         fac_max = cz(ibnd_max(1), ibnd) / norm_vec(ibnd_max(1))
+         cz(1:nbnd, ibnd) = cz(1:nbnd, ibnd) * CONJG(fac_max)
+       END DO
+    END IF
+!!!!!!
     DO jbnd = 1, nbnd
       INNER : DO ibnd = 1, nbnd
         IF (ABS(cz(ibnd, jbnd)) > eps12) THEN
           cz(:, jbnd) = cz(:, jbnd) * CONJG(cz(ibnd, jbnd))
-          !cz(:, jbnd) = cz(:, jbnd) / SQRT(zdotu(nbnd, CONJG(cz(:, jbnd)), 1, cz(:, jbnd), 1))
           cz(:, jbnd) = cz(:, jbnd) / SQRT(utility_zdotu(CONJG(cz(:, jbnd)), cz(:, jbnd)))
           EXIT INNER
         ENDIF
@@ -277,6 +342,12 @@
     !
     cuf = CONJG(TRANSPOSE(cz))
     eig = w
+!!!!!!
+    ! Do the conjugate on eigenvector
+    IF (PRESENT(is_mirror)) THEN
+      IF(is_mirror) cuf = TRANSPOSE(cz)
+    END IF
+!!!!!!
     !
     CALL stop_clock('HamW2B')
     !
@@ -284,8 +355,49 @@
     END SUBROUTINE hamwan2bloch
     !--------------------------------------------------------------------------
     !
+!!!!!!
     !--------------------------------------------------------------------------
-    SUBROUTINE dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, cuf, eig)
+    SUBROUTINE prime_number_matrix(A, n)
+    !--------------------------------------------------------------------------
+      !!
+      !! Generating a n x n matrix A filled with prime numbers
+      !! For example, if n = 4, A =
+      !! |2   3  5  7|
+      !! |11 13 17 19|
+      !! |23 29 31 37|
+      !! |41 43 47 53|
+      !! Used to perturb the degenerate eigenstates
+      !! 2021 Chao Lian
+      !
+      USE kinds,         ONLY : DP
+      IMPLICIT NONE
+      REAL(dp), INTENT(OUT) :: A(:, :)
+      INTEGER, INTENT(IN) :: n
+      INTEGER :: prime_numbers(60) = (/2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, &
+         & 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, &
+         & 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281/)
+
+      INTEGER :: i, j, k
+      k = 0
+      DO i = 1, n
+         DO j = i, n
+            k = k + 1
+            A(i, j) = REAL(prime_numbers(k), dp)
+         END DO
+      END DO
+
+      DO i = 1, n
+         DO j = 1, i
+            A(i, j) = (A(j, i))
+         END DO
+      END DO
+
+    END SUBROUTINE
+    !
+    !--------------------------------------------------------------------------
+    !SUBROUTINE dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, cuf, eig)
+    SUBROUTINE dynwan2bloch(nmodes, nrr_q, irvec_q, ndegen_q, xxq, cuf, eig, is_mirror)
+!!!!!!
     !--------------------------------------------------------------------------
     !!
     !! From the Hamiltonian in Wannier representation, find the corresponding
@@ -296,16 +408,23 @@
     !! required to obtain proper phonon dispersion interpolation
     !! and corresponds to the reality of the interatomic force constants
     !!
+    !!  2021: CL : Lifting of degeneracies using random perturbation matrix
+    !!             with prime number matrix control tag : lphase
+    !!             Rotate the the largest element in eigenvector to real axis. (lrot)
     !
     USE kinds,         ONLY : DP
     USE cell_base,     ONLY : at, bg
     USE ions_base,     ONLY : amass, tau, nat, ityp
     USE elph2,         ONLY : rdw, epsi, zstar, qrpl
-    USE epwcom,        ONLY : lpolar, lphase, use_ws, nqc1, nqc2, nqc3
+    USE epwcom,        ONLY : lpolar, lphase, lrot, use_ws, nqc1, nqc2, nqc3
+    USE epwcom,        ONLY : debug_plrn
     USE constants_epw, ONLY : twopi, ci, czero, zero, one, eps12
     USE rigid,         ONLY : cdiagh2
     USE low_lvl,       ONLY : utility_zdotu
     USE rigid_epw,     ONLY : rgd_blk
+!!!!!!
+    USE low_lvl,       ONLY : degen_sort
+!!!!!!
     !
     IMPLICIT NONE
     !
@@ -323,6 +442,10 @@
     !! interpolated dynamical matrix eigenvalues for this kpoint
     COMPLEX(KIND = DP), INTENT(out) :: cuf(nmodes, nmodes)
     !! Rotation matrix, fine mesh
+!!!!!!
+    LOGICAL, INTENT(IN), OPTIONAL :: is_mirror
+    !! .true. if q-point is a the mirror point of some original point
+!!!!!!
     !
     ! Local variables
     INTEGER :: imode
@@ -351,6 +474,55 @@
     ! Dynamical matrix in Bloch basis, fine mesh
     COMPLEX(KIND = DP) :: cfac
     !! Complex prefactor for Fourier transform.
+!!!!!!
+    INTEGER, ALLOCATABLE :: degen_group(:, :)
+    !! Index of degenerate subspace
+    INTEGER :: ierr
+    !! Error status
+    INTEGER :: list_dup(nmodes)
+    !! List of degenerate eigenvalues
+    INTEGER :: ndeg
+    !! Number of degeneracies
+    LOGICAL :: duplicates
+    !! Returns if the bands contains degeneracices for that k-point.
+    INTEGER :: ig
+    !! Counter on real-space index
+    !JLB
+    INTEGER :: info
+    !! "0" successful exit, "<0" i-th argument had an illegal value, ">0" i eigenvectors failed to converge.
+    REAL(KIND = DP) :: rwork_tri(3*nmodes)
+    !! Real work array for TRI q case
+    REAL(KIND = DP) :: rchf(nmodes, nmodes), norm_vec(nmodes)
+    !! Real Dynamical matrix in Bloch basis for TRI q points
+    INTEGER, ALLOCATABLE :: iwork(:)
+    !! IWORK(1) returns the optimal LIWORK.
+    REAL(KIND = DP) :: rand1
+    !! Random number
+    REAL(KIND = DP) :: rand2
+    !! Random number
+    REAL(KIND = DP), ALLOCATABLE :: wp(:)
+    !! Perturbed eigenvalues on the degenerate subspace
+    REAL(KIND = DP), ALLOCATABLE :: rwork(:)
+    !! RWORK(1) returns the optimal LRWORK.
+    COMPLEX(KIND = DP) :: P(nmodes, nmodes)
+    !! Perturbation matrix made of small complex random number on the full space
+    COMPLEX(KIND = DP) :: fac_max(1)
+    !!
+    COMPLEX(KIND = DP), ALLOCATABLE :: cwork(:)
+    !! Complex work variable
+    REAL(KIND = DP), ALLOCATABLE :: P_prime(:, :)
+    !! Perturbation matrix on the subspace
+    COMPLEX(KIND = DP), ALLOCATABLE :: Uk(:, :)
+    !! Rotation matrix on the full space
+    INTEGER :: starting
+    !! Starting position
+    INTEGER :: ending
+    !! Ending position
+    INTEGER :: length
+    !! Size of the degenerate subspace
+    INTEGER :: imode_max(1)
+    !! Size of the degenerate subspace
+!!!!!!
     !
     CALL start_clock ('DynW2B')
     !----------------------------------------------------------
@@ -433,6 +605,78 @@
     !             0, 0, -one, neig, w, cz, nmodes, cwork, &
     !             rwork, iwork, ifail, info)
     CALL cdiagh2(nmodes, chf, nmodes, w, cz)
+!!!!!!
+    ! Find the degenerate eigenvalues w
+    CALL degen_sort(w, SIZE(w), duplicates, list_dup)
+    !
+    ndeg = MAXVAL(list_dup)
+    ALLOCATE(degen_group(2, ndeg), STAT = ierr)
+    IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating degen_group', 1)
+    degen_group(:, :) = 0
+    !
+    ! degen_group contains the starting and ending position of each group
+    ! degen_group(1,1) = starting position of group 1
+    ! degen_group(2,1) = ending position of group 1
+    ! degen_group(1,2) = starting position of group 2 ...
+    DO ig = 1, ndeg
+      degen_group(2, ig) = 0
+      DO jmode = 1, nmodes
+        IF (list_dup(jmode) == ig) THEN
+          IF (jmode == 1) THEN
+            degen_group(1, ig) = jmode
+          ELSE
+            IF (list_dup(jmode) - list_dup(jmode - 1) /= 0) degen_group(1, ig) = jmode
+          ENDIF
+          degen_group(2, ig) = degen_group(2, ig) + 1
+        ENDIF
+      ENDDO
+      degen_group(2, ig) = degen_group(1, ig) + degen_group(2, ig) -1
+    ENDDO
+    !
+    ! Generate a pertubation matrix of size (nbnd x nbnd) made of random number
+    !
+    DO ig = 1, ndeg
+      starting = degen_group(1, ig)
+      ending   = degen_group(2, ig)
+      ! Size of the degenerate subspace
+      length   = ending - starting + 1
+      !
+      ALLOCATE(rwork(length**2 + 2 * length), STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating rwork', 1)
+      ALLOCATE(iwork(3 + 5 * length), STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating iwork', 1)
+      ALLOCATE(cwork(length**2 + 2 * length), STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating cwork', 1)
+      ALLOCATE(Uk(nmodes, length), STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating Uk', 1)
+      ALLOCATE(P_prime(length, length), STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating P_prime', 1)
+      ALLOCATE(wp(length), STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error allocating wp', 1)
+      !
+      Uk(:, 1:length) = cz(:, starting:ending)
+      CALL prime_number_matrix(P_prime, length)
+      ! Diagonalization of P_prime
+      CALL DSYEV('V', 'L', length, P_prime, length, wp, rwork, &
+                length**2 + 2 * length, info)
+      ! On exiting P_prime is the eigenvector of the P_prime matrix and wp the eigenvector.
+      !
+      IF(lphase) cz(:, starting:ending) = MATMUL(Uk, P_prime)
+      !
+      DEALLOCATE(rwork, STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating rwork', 1)
+      DEALLOCATE(iwork, STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating iwork', 1)
+      DEALLOCATE(cwork, STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating cwork', 1)
+      DEALLOCATE(Uk, STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating Uk', 1)
+      DEALLOCATE(P_prime, STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating P_prime', 1)
+      DEALLOCATE(wp, STAT = ierr)
+      IF (ierr /= 0) CALL errore('hamwan2bloch', 'Error deallocating wp', 1)
+    ENDDO ! ig
+!!!!!!
     !
     ! clean noise
     DO jmode = 1,nmodes
@@ -440,6 +684,19 @@
         IF (ABS(cz(imode, jmode)) < eps12) cz(imode, jmode) = czero
       ENDDO
     ENDDO
+!!!!!!
+    ! Find the largest element and set it to pure real
+    IF(lrot) THEN
+       DO imode = 1, nmodes
+         DO jmode = 1, nmodes
+            norm_vec(jmode) = ABS(cz(jmode, imode))
+         END DO
+         imode_max(:) = MAXLOC(norm_vec(1:nmodes))
+         fac_max(1) = cz(imode_max(1), imode)/norm_vec(imode_max(1))
+         cz(1:nmodes, imode) = cz(1:nmodes, imode) * CONJG(fac_max(1))
+       END DO
+     END IF
+!!!!!!
     !
     ! DS - Impose phase
     IF (lphase) THEN
@@ -458,6 +715,11 @@
     !
     cuf = cz
     eig = w
+!!!!!!
+    IF(PRESENT(is_mirror)) THEN
+      IF(is_mirror) cuf = CONJG(cz)
+    END IF
+!!!!!!
     !
     CALL stop_clock('DynW2B')
     !
@@ -466,18 +728,27 @@
     !--------------------------------------------------------------------------
     !
     !--------------------------------------------------------------------------
-    SUBROUTINE dynifc2blochf(nmodes, rws, nrws, xxq, cuf, eig)
+!!!!!
+   !SUBROUTINE dynifc2blochf(nmodes, rws, nrws, xxq, cuf, eig)
+    SUBROUTINE dynifc2blochf(nmodes, rws, nrws, xxq, cuf, eig, is_mirror)
+!!!!!
     !--------------------------------------------------------------------------
     !!
     !!  From the IFCs in the format of q2r, find the corresponding
     !!  dynamical matrix for a given q point (as in matdyn.x) on the fine grid
-    !!
+    !!  2021: CL : Lifting of degeneracies using random perturbation matrix
+    !!             with prime number matrix control tag : lphase
+    !!             Rotate the the largest element in eigenvector to real axis. (lrot)
     !
     USE kinds,     ONLY : DP
     USE cell_base, ONLY : at, bg
     USE ions_base, ONLY : amass, tau, nat, ityp
     USE elph2,     ONLY : ifc, epsi, zstar, wscache, qrpl
     USE epwcom,    ONLY : lpolar, nqc1, nqc2, nqc3, lphase
+!!!!!!
+    USE epwcom,    ONLY : debug_plrn, lrot
+    USE low_lvl,   ONLY : degen_sort
+!!!!!!
     USE io_global, ONLY : stdout
     USE rigid_epw, ONLY : rgd_blk
     USE low_lvl,       ONLY : utility_zdotu
@@ -497,6 +768,10 @@
     !! interpolated phonon eigenvalues for this qpoint
     COMPLEX(KIND = DP), INTENT(out) :: cuf(nmodes, nmodes)
     !! Rotation matrix, fine mesh
+!!!!!!
+    LOGICAL, INTENT(IN), OPTIONAL :: is_mirror
+    !! .true. if q-point is a time-reversal point
+!!!!!!
     !
     ! Local variables
     LOGICAL, SAVE :: first = .TRUE.
@@ -557,6 +832,14 @@
     !! Dynamical matrix in Bloch basis, fine mesh
     COMPLEX(KIND = DP) :: dyn(3, 3, nat, nat)
     !! Dynamical matrix
+!!!!!
+    REAL(KIND = DP) :: norm_vec(nmodes)
+    !! Vector for one eigenmode
+    INTEGER :: imode_max(1)
+    !! index of the max element
+    COMPLEX(KIND = DP) :: fac_max(1)
+    !! value of the max element
+!!!!!
     !
     CALL start_clock('DynW2B')
     !
@@ -686,6 +969,19 @@
     CALL zhpevx('V', 'A', 'U', nmodes, champ , zero, zero, &
                 0, 0, -one, neig, w, cz, nmodes, cwork, rwork, iwork, ifail, info)
     !
+!!!!!
+    ! Find the largest element and set it to pure real
+    IF(lrot) THEN
+       DO imode = 1, nmodes
+         DO jmode = 1, nmodes
+            norm_vec(jmode) = ABS(cz(jmode, imode))
+         END DO
+         imode_max(:) = MAXLOC(norm_vec(1:nmodes))
+         fac_max(1) = cz(imode_max(1), imode)/norm_vec(imode_max(1))
+         cz(1:nmodes, imode) = cz(1:nmodes, imode) * CONJG(fac_max(1))
+       END DO
+    END IF
+!!!!!
     ! clean noise
     DO jmode = 1,nmodes
       DO imode = 1,nmodes
diff --git a/EPW/src/wfc_elec.f90 b/EPW/src/wfc_elec.f90
deleted file mode 100644
index adb6d5e74..000000000
--- a/EPW/src/wfc_elec.f90
+++ /dev/null
@@ -1,136 +0,0 @@
-!
-! Copyright (C) 2010-2016 Samuel Ponce', Roxana Margine, Carla Verdi, Feliciano Giustino
-! Copyright (C) 2007-2009 Jesse Noffsinger, Brad Malone, Feliciano Giustino
-!
-! This file is distributed under the terms of the GNU General Public
-! License. See the file `LICENSE' in the root directory of the
-! present distribution, or http://www.gnu.org/copyleft.gpl.txt .
-!
-!-----------------------------------------------------------------------
-MODULE polaron
-    USE kinds,  ONLY : dp
-    IMPLICIT NONE
-    ! Data block, try to keep it to minimal
-    COMPLEX(KIND = DP), ALLOCATABLE :: epfall(:, :, :, :, :)
-    !! el-ph element for all local k and all q
-    !! epfall need to be filled in ephwann_shuffle
-    COMPLEX(KIND = DP), ALLOCATABLE :: ufall(:, :, :)
-    !! el-ph element for all local k and all q
-    !! epfall need to be filled in ephwann_shuffle
-    COMPLEX(KIND = DP), ALLOCATABLE :: Hamil(:, :)
-    !! Hamil need to be passed to h_psi because the parameter space is fixed
-    !! to meet the requirement of Davidson diagonalization. Ugly but workable.
-    COMPLEX(KIND = DP), ALLOCATABLE :: eigVec(:, :)
-    !! polaron eigenvector
-    REAL(KIND = DP),    ALLOCATABLE :: etf_all(:, :)
-    !! Gather all the eigenvalues
-    INTEGER,            ALLOCATABLE :: ikq_all(:, :), kpg_map(:)
-    !
-    PUBLIC  :: wfc_elec, interp_plrn_wf, interp_plrn_bq, plot_plrn_wf
-CONTAINS
-    !
-    !-----------------------------------------------------------------------
-    SUBROUTINE interp_plrn_wf(nrr_k, ndegen_k, irvec_r, dims)
-        USE io_global,     ONLY : stdout, ionode
-
-        IMPLICIT NONE
-
-        INTEGER, INTENT (IN) :: nrr_k, dims, ndegen_k(:,:,:) ! ! Added for polaron calculations by Chao Lian.
-        REAL(DP), INTENT (IN) :: irvec_r(3, nrr_k)
-        COMPLEX(DP), ALLOCATABLE :: eigvec_wan(:, :)
-        INTEGER :: nkf1_p, nkf2_p, nkf3_p, nktotf_p, nbndsub_p
-    END SUBROUTINE
-    !
-    !-----------------------------------------------------------------------
-    SUBROUTINE interp_plrn_bq(nrr_q, ndegen_q, irvec_q)
-        USE epwcom,        ONLY : nkf1, nkf2, nkf3, nbndsub
-        USE elph2, only : xqf, wf, nqtotf
-        USE modes,         ONLY : nmodes
-        USE constants_epw, only : eps8, czero, one, two, twopi, ci
-        USE ions_base,     ONLY : nat, amass, ityp, tau
-        USE wan2bloch, only : dynwan2bloch
-
-        IMPLICIT NONE
-        INTEGER, INTENT (IN) :: nrr_q, ndegen_q(:,:,:) ! ! Added for polaron calculations by Chao Lian.
-        INTEGER, INTENT (IN) :: irvec_q(3, nrr_q)
-
-        INTEGER :: dtau_file
-        INTEGER :: nkf1_p, nkf2_p, nkf3_p, nktotf_p, nat_p
-
-        INTEGER :: iq, inu, ierr, imu, na, iatm, idir
-        INTEGER :: icount, ix, iy, iz, bmat_file
-        COMPLEX(DP) :: ctemp, shift(3)
-
-        COMPLEX(DP), ALLOCATABLE :: uf(:, :), Bmat(:,:)
-        COMPLEX(DP),  ALLOCATABLE :: dtau(:, :)
-        REAL(DP),  ALLOCATABLE :: w2(:)
-        REAL(KIND=dp) :: xxq(3)
-        COMPLEX(KIND=dp) :: expTable(3)
-
-    END SUBROUTINE
-
-    SUBROUTINE wfc_elec (nrr_k, ndegen_k, irvec_r, dims)
-        !
-        ! Self consistency calculation of polaron wavefunction.
-        ! Rewritten by Chao Lian based on the implementation by Danny Sio.
-        !
-        USE modes,         ONLY : nmodes
-        USE constants_epw, ONLY : ryd2mev, one, ryd2ev, two, zero
-        USE constants_epw, ONLY : czero, cone, pi, ci, twopi, eps6, eps8, eps5
-        USE epwcom,        ONLY : num_cbands, polaron_type, sigma_plrn, full_diagon_plrn
-        USE epwcom,        ONLY : r01, r02, r03, nPlrn, conv_thr_polaron, cb_shift
-        USE epwcom,        ONLY : mixing_Plrn, init_plrn_wf, niterPlrn
-        USE epwcom,        ONLY : nkf1, nkf2, nkf3, nbndsub
-        USE io_global,     ONLY : stdout, ionode, meta_ionode_id
-        USE elph2,         ONLY : etf, ibndmin, ibndmax, nbndfst
-        USE elph2,         ONLY : nkqf, nkf, nqf, nqtotf, nktotf
-        USE elph2,         ONLY : xkf, xqf, wf, xkq, chw
-        USE mp_global,     ONLY : inter_pool_comm
-        USE mp_world,      ONLY : world_comm
-        USE cell_base,     ONLY : bg
-        USE mp,            ONLY : mp_sum, mp_bcast
-        USE poolgathering, ONLY : poolgather2
-        USE test_tools,    ONLY : para_write
-        USE wan2bloch,     ONLY : hamwan2bloch
-        USE ions_base,     ONLY : nat
-
-        IMPLICIT NONE
-
-        ! local variables
-        LOGICAL :: debug
-        INTEGER :: inu, iq, ik, ikk, jk, ibnd, jbnd, ikq, ik_global, iplrn, ierr
-        INTEGER :: iter, icount, ix, iy, iz, start_mode, ik_bm, idos, iatm
-
-        INTEGER, INTENT (IN) :: nrr_k, dims, ndegen_k(:,:,:) ! ! Added for polaron calculations by Chao Lian.
-        REAL(DP), INTENT (IN) :: irvec_r(3, nrr_k)
-
-        COMPLEX(DP),  ALLOCATABLE :: Bmat(:,:), Bmat_save(:,:)
-        COMPLEX(DP),  ALLOCATABLE :: eigvec_wan(:, :), dtau(:, :)
-        REAL(DP),     ALLOCATABLE :: rmat_tmp(:, :)
-
-        COMPLEX(KIND=dp) :: cufkk ( nbndsub, nbndsub ), cfac(nrr_k, dims, dims)
-        !! Rotation matrix, fine mesh, points k
-
-        REAL(dp):: estmteRt(nPlrn),  eigVal(nPlrn), esterr
-
-        REAL(KIND=dp) :: qcart(3), r0(3), xxk(3), xxq(3), prefac, norm
-        REAL(KIND=dp) :: ef
-
-        INTEGER :: band_pos, iqpg, ikpg, ikGamma, iqGamma
-        INTEGER :: nkf1_p, nkf2_p, nkf3_p, nbndsub_p, nPlrn_p, nktotf_p
-
-        REAL(DP) :: eb
-        REAL(DP) :: xkf_all(3, nktotf), xkf_all_tmp(3, nktotf*2)
-        REAL(DP) :: EPlrnTot, EPlrnElec, EPlrnPhon
-        REAL(DP) :: disK, disK_t, shift(3)
-
-        COMPLEX(DP) :: ctemp
-        REAL(DP)    :: rtemp
-        INTEGER     :: itemp, jtemp
-        INTEGER     :: dos_file, wan_func_file, bloch_func_file, bmat_file, dtau_file
-        !LOGICAL     :: SCF_run
-
-    END SUBROUTINE
-    SUBROUTINE plot_plrn_wf()
-    END SUBROUTINE
-END MODULE