From 09316f96ee213854ddd8dd2f28156e14d3ec0b0a Mon Sep 17 00:00:00 2001
From: paoloumari <pumari00@r000u06l01.marconi.cineca.it>
Date: Sat, 30 Jun 2018 10:34:40 +0200
Subject: [PATCH] New simple_ip code P.Umari G.Prandini N.Marzari

---
 GWW/simple_ip/Makefile              |  54 +++
 GWW/simple_ip/diagonalization.f90   | 195 +++++++++
 GWW/simple_ip/dielectric.f90        | 582 ++++++++++++++++++++++++
 GWW/simple_ip/input_simple_ip.f90   |  79 ++++
 GWW/simple_ip/interpolation.f90     | 253 +++++++++++
 GWW/simple_ip/make.depend           |  37 ++
 GWW/simple_ip/simple_ip.f90         |  56 +++
 GWW/simple_ip/simple_ip_objects.f90 | 656 ++++++++++++++++++++++++++++
 GWW/simple_ip/start_end.f90         |  68 +++
 GWW/simple_ip/tetra_mod1.f90        | 302 +++++++++++++
 GWW/simple_ip/tetra_mod2.f90        | 302 +++++++++++++
 11 files changed, 2584 insertions(+)
 create mode 100644 GWW/simple_ip/Makefile
 create mode 100644 GWW/simple_ip/diagonalization.f90
 create mode 100644 GWW/simple_ip/dielectric.f90
 create mode 100644 GWW/simple_ip/input_simple_ip.f90
 create mode 100644 GWW/simple_ip/interpolation.f90
 create mode 100644 GWW/simple_ip/make.depend
 create mode 100644 GWW/simple_ip/simple_ip.f90
 create mode 100644 GWW/simple_ip/simple_ip_objects.f90
 create mode 100644 GWW/simple_ip/start_end.f90
 create mode 100644 GWW/simple_ip/tetra_mod1.f90
 create mode 100644 GWW/simple_ip/tetra_mod2.f90

diff --git a/GWW/simple_ip/Makefile b/GWW/simple_ip/Makefile
new file mode 100644
index 000000000..df9b034cd
--- /dev/null
+++ b/GWW/simple_ip/Makefile
@@ -0,0 +1,54 @@
+# Makefile for simple_ip
+
+include ../../make.inc
+
+# location of include files
+IFLAGS=
+
+# location of needed modules
+MODFLAGS= $(MOD_FLAG)../../iotk/src $(MOD_FLAG)../../Modules \
+          $(MOD_FLAG)../pw4gww $(MOD_FLAG)../../PW/src  \
+          $(MOD_FLAG)../../LAXlib $(MOD_FLAG)../../UtilXlib $(MOD_FLAG).
+
+#location of needed libraries
+LIBOBJS= ../../iotk/src/libiotk.a  \
+         ../../clib/clib.a 
+
+SIMPLEIPOBJS = \
+simple_ip_objects.o \
+start_end.o \
+input_simple_ip.o \
+tetra_mod1.o \
+interpolation.o \
+diagonalization.o \
+dielectric.o  
+
+
+QEMODS = ../../Modules/libqemod.a ../../FFTXlib/libqefft.a ../../KS_Solvers/CG/libcg.a ../../KS_Solvers/Davidson/libdavid.a \
+	../../LAXlib/libqela.a ../../UtilXlib/libutil.a
+PWOBJS = ../../PW/src/libpw.a
+
+LIBMIN= 
+
+TLDEPS=bindir mods libs libiotk pw
+
+all : tldeps simple_ip.x  libsimple_ip.a
+
+simple_ip.x : simple_ip.o $(SIMPLEIPOBJS) $(LIBOBJS) $(PWOBJS) $(QEMODS)  $(LIBMIN) 
+	$(MPIF90) $(LDFLAGS) -o $@ \
+		simple_ip.o $(SIMPLEIPOBJS) $(PWOBJS) $(QEMODS) $(LIBOBJS) $(LIBMIN) $(LIBS) 
+	- ( cd ../../bin; ln -fs ../GWW/simple_ip/$@ . )
+
+libsimple_ip.a : $(SIMPLEIPOBJS)
+	$(AR) $(ARFLAGS) $@ $(SIMPLEIPOBJS)
+
+tldeps :
+	if test -n "$(TLDEPS)" ; then \
+	( cd ../.. ; $(MAKE) $(TLDEPS) || exit 1 ) ; fi
+
+clean :
+	- /bin/rm -f -v simple_ip.x  *.o *~ *.F90 *.d *.mod *.i work.pc
+
+include make.depend
+
+# DO NOT DELETE
diff --git a/GWW/simple_ip/diagonalization.f90 b/GWW/simple_ip/diagonalization.f90
new file mode 100644
index 000000000..1b03121d3
--- /dev/null
+++ b/GWW/simple_ip/diagonalization.f90
@@ -0,0 +1,195 @@
+
+SUBROUTINE diagonalization(q,sh,input,eig,ik,kptns)
+  !------------------------------------------------------------------
+  !
+  ! This routine calculates H(q)_ij in the optimal basis for a given q and then diagonalizes it.
+  ! It reconstructs V_nonloc_ij from the interpolated projectors
+  ! h_level = 0 only kinetic part
+  ! h_level = 1 kinetic part + local
+  ! h_level > 1 kinetic part + local + non-local
+  !
+  USE kinds, ONLY : DP 
+  USE constants, ONLY : pi , rytoev
+  USE simple_ip_objects
+  USE input_simple_ip
+  USE io_global, ONLY : stdout
+  !
+  IMPLICIT NONE
+  !
+  REAL(kind=DP), INTENT(in) :: q(3)  ! k-point (units 2*pi/alat)
+  INTEGER, INTENT(in)  :: ik         ! k-point in the local dense grid
+  TYPE(shirley) :: sh
+  TYPE(eigen) :: eig
+  TYPE(kpoints) :: kptns
+  TYPE(input_options_simple_ip) :: input
+  COMPLEX(kind=DP), DIMENSION(:,:), ALLOCATABLE  :: h_mat 
+  COMPLEX(kind=DP), DIMENSION(:), ALLOCATABLE :: work
+  REAL(kind=DP), DIMENSION(:), ALLOCATABLE :: rwork , energies_tmp
+  INTEGER, DIMENSION(:), ALLOCATABLE :: iwork , ifail
+  COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: b_nc , csca_nc 
+  COMPLEX(kind=DP), DIMENSION(:,:), ALLOCATABLE :: b_c  ,  csca_mat , csca_mat2 , deeaux , csca_nc2 
+  REAL(kind=DP) :: k2  , abstol , vl , vu 
+  INTEGER :: i, ndim, nt, na, j, ih, jh, ikb, jkb, lwork, info, low_eig, high_eig ,ntot_e , num_bands
+  !
+  !call start_clock('diagonalization')
+  !
+  if(associated(eig%energy)) deallocate(eig%energy)
+  allocate(eig%energy(sh%num_bands))
+  if(associated(eig%wave_func)) deallocate(eig%wave_func)
+  allocate(eig%wave_func(sh%ntot_e,sh%num_bands))
+  !
+  allocate(h_mat(sh%ntot_e,sh%ntot_e),energies_tmp(sh%ntot_e))
+  h_mat(1:sh%ntot_e,1:sh%ntot_e) = 0.d0
+  energies_tmp(1:sh%ntot_e) = 0.d0
+  !
+  ! Construction of the matrix H(q) = (1/2)*[q**2 + q*K1 + K0] + Vloc + Vnonloc(q) in the Shirley basis
+  ! The factor 1/2 in front of the kinetic term is not needed in the QE units
+  !
+  ! Diagonal part: q^2  
+  k2 = q(1)*q(1) + q(2)*q(2) + q(3)*q(3)
+  k2 = k2 * (2.0*pi/sh%alat)**2
+  do i=1,sh%ntot_e
+   h_mat(i,i) = k2  
+  enddo
+  !
+  ! Kinetic term: q*K1
+  do i=1,3
+   h_mat(1:sh%ntot_e,1:sh%ntot_e) = h_mat(1:sh%ntot_e,1:sh%ntot_e) + &
+         & q(i)*sh%h1(1:sh%ntot_e,1:sh%ntot_e,i) * (2.0*pi/sh%alat)  !*0.5
+  enddo  
+  !
+  ! Kinetic term: K0
+  h_mat(1:sh%ntot_e,1:sh%ntot_e) = h_mat(1:sh%ntot_e,1:sh%ntot_e) + sh%h0(1:sh%ntot_e,1:sh%ntot_e)
+  !
+  ! Vloc
+  if (input%h_level>0) then
+     h_mat(1:sh%ntot_e,1:sh%ntot_e) = h_mat(1:sh%ntot_e,1:sh%ntot_e) + sh%Vloc(1:sh%ntot_e,1:sh%ntot_e)
+  endif
+  !
+  ! Vnonloc
+  !call start_clock('diago_vnloc')
+  if (input%h_level>1) then
+   !
+   if (sh%noncolin) then
+     ndim = sh%nkb * sh%npol * sh%ntot_e
+     allocate(b_nc(sh%nkb,sh%npol,sh%ntot_e))
+     if(input%nonlocal_interpolation) then
+       call trilinear_parallel_nc(kptns,ik,sh,b_nc)
+     else
+       b_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb, 1:sh%npol, 1:sh%ntot_e,ik)
+     endif
+   else
+     ndim = sh%nkb * sh%ntot_e
+     allocate(b_c(sh%nkb,sh%ntot_e))
+     if(input%nonlocal_interpolation) then
+       call trilinear_parallelc(kptns,ik,sh,b_c)
+     else
+       b_c(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb, 1:sh%ntot_e,ik)
+     endif
+   endif
+   !
+   ! Build Vnonloc
+   if (sh%noncolin) then
+     !
+     allocate(csca_nc(sh%nkb,sh%npol,sh%ntot_e),csca_nc2(sh%ntot_e,sh%ntot_e))
+     csca_nc(:,:,:) = 0.d0
+     do nt=1,sh%ntyp
+      if (sh%nh(nt)==0) CYCLE
+      do na=1,sh%nat
+       if ( sh%ityp(na) == nt ) then
+           do j=1,sh%ntot_e
+            do jh=1,sh%nh(nt)
+             jkb = sh%indv_ijkb0(na)+jh
+             do ih=1,sh%nh(nt)
+               ikb = sh%indv_ijkb0(na)+ih  
+               ! 
+               csca_nc(ikb,1,j) = csca_nc(ikb,1,j) +    & 
+                              sh%deeq_nc(ih,jh,na,1)*b_nc(jkb,1,j)+ & 
+                              sh%deeq_nc(ih,jh,na,2)*b_nc(jkb,2,j) 
+               csca_nc(ikb,2,j) = csca_nc(ikb,2,j)  +   & 
+                              sh%deeq_nc(ih,jh,na,3)*b_nc(jkb,1,j)+&
+                              sh%deeq_nc(ih,jh,na,4)*b_nc(jkb,2,j) 
+               !
+               enddo
+            enddo
+           enddo
+       endif    
+      enddo
+     enddo
+     !
+     call ZGEMM ('C', 'N', sh%ntot_e, sh%ntot_e, sh%npol*sh%nkb, ( 1.d0, 0.d0 ) , b_nc, &
+                   sh%npol*sh%nkb, csca_nc, sh%npol*sh%nkb, ( 0.d0, 0.d0 ) , csca_nc2, sh%ntot_e)
+     h_mat(1:sh%ntot_e,1:sh%ntot_e) = h_mat(1:sh%ntot_e,1:sh%ntot_e) + csca_nc2(1:sh%ntot_e,1:sh%ntot_e)
+     !
+     deallocate(csca_nc,csca_nc2)
+     !
+   else
+     !
+     allocate(csca_mat(sh%nkb,sh%ntot_e), csca_mat2(sh%ntot_e,sh%ntot_e))
+     !
+     do nt=1,sh%ntyp
+      if (sh%nh(nt)==0) cycle
+      allocate ( deeaux(sh%nh(nt),sh%nh(nt)) )
+      do na=1,sh%nat
+       if ( sh%ityp(na) == nt ) then
+          !
+          deeaux(1:sh%nh(nt),1:sh%nh(nt)) = sh%deeqc(1:sh%nh(nt),1:sh%nh(nt),na) 
+          call ZGEMM('N','N', sh%nh(nt), sh%ntot_e, sh%nh(nt), (1.d0,0.d0), &
+                     deeaux, sh%nh(nt), b_c(sh%indv_ijkb0(na)+1,1), sh%nkb, &
+                     (0.d0, 0.d0), csca_mat(sh%indv_ijkb0(na)+1,1), sh%nkb )
+          !
+       endif    
+      enddo
+      deallocate(deeaux)
+     enddo
+     !
+     CALL ZGEMM( 'C', 'N', sh%ntot_e, sh%ntot_e, sh%nkb, ( 1.d0, 0.d0 ) , b_c, &
+                   sh%nkb, csca_mat, sh%nkb, ( 0.d0, 0.d0 ) , csca_mat2, sh%ntot_e )
+     h_mat(1:sh%ntot_e,1:sh%ntot_e) = h_mat(1:sh%ntot_e,1:sh%ntot_e) + csca_mat2(1:sh%ntot_e,1:sh%ntot_e)
+     !
+     deallocate(csca_mat, csca_mat2)
+     !
+   endif
+   !
+   if (sh%noncolin) then
+     deallocate(b_nc)
+   else
+     deallocate(b_c)
+   endif
+   !
+  endif
+  !call stop_clock('diago_vnloc')
+  !
+  ! Hamiltonian diagonalization 
+  allocate(rwork(7*sh%ntot_e),iwork(5*sh%ntot_e),ifail(sh%ntot_e))
+  allocate(work(1))
+  lwork = -1
+  abstol = -1.d0
+  vl = 0.d0
+  vu = 0.d0
+  low_eig = sh%num_nbndv(1)-sh%num_val+1
+  high_eig = sh%num_nbndv(1)+sh%num_cond
+  num_bands = sh%num_bands
+  !
+  CALL ZHEEVX( 'V', 'I', 'U', sh%ntot_e, h_mat, sh%ntot_e, vl, vu, low_eig, high_eig, abstol, &
+             & num_bands, energies_tmp, eig%wave_func, sh%ntot_e, work, lwork, rwork, iwork, ifail, info)
+  !
+  lwork = int(work(1))
+  deallocate(work)
+  allocate(work(1:lwork))
+  !
+  !Diagonalization of H(k) using ZHEEVX (only the first sh%num_bands energies/wavefunctions are explicitly calculated)
+  !call start_clock('diago_zheevx')
+  call ZHEEVX( 'V', 'I', 'U', sh%ntot_e, h_mat, sh%ntot_e, vl, vu, low_eig, high_eig, abstol, &
+             & num_bands, energies_tmp, eig%wave_func, sh%ntot_e, work, lwork, rwork, iwork, ifail, info)
+  !call stop_clock('diago_zheevx')
+  !
+  if (info /= 0) write(stdout,*) 'ERROR in the diagonalization of the k-dependent Hamiltonian', info
+  eig%energy(1:sh%num_bands) = energies_tmp(1:sh%num_bands)
+  !
+  deallocate(h_mat, rwork, work, iwork, ifail, energies_tmp)
+  !
+  !call stop_clock('diagonalization')
+  !
+END SUBROUTINE diagonalization
+
diff --git a/GWW/simple_ip/dielectric.f90 b/GWW/simple_ip/dielectric.f90
new file mode 100644
index 000000000..b8e8ea8db
--- /dev/null
+++ b/GWW/simple_ip/dielectric.f90
@@ -0,0 +1,582 @@
+
+subroutine dielectric(sh,input,kpnts,energy)
+  !------------------------------------------------------------------------
+  !
+  ! This subroutine diagonalizes the k-dependent Hamiltonian for every k in interp_grid
+  ! and calculates the IP dielectric function
+  !
+  USE simple_ip_objects
+  USE input_simple_ip
+  USE tetra_ip, ONLY : tetrahedra1, weights_delta1
+  USE kinds, ONLY : DP
+  USE io_global, ONLY : stdout, ionode
+  USE constants, ONLY : rytoev, pi
+  USE mp_world,  ONLY : world_comm
+  USE mp,        ONLY : mp_sum
+  !
+  IMPLICIT NONE
+  !
+  TYPE(shirley) :: sh
+  TYPE(input_options_simple_ip) :: input
+  TYPE(energies) :: energy
+  TYPE(kpoints) :: kpnts
+  TYPE(eigen) :: eig
+  REAL(kind=DP) :: q(3)
+  INTEGER :: ik, idir, ii , counting , iun
+  COMPLEX(kind=DP), DIMENSION(:,:), ALLOCATABLE :: tmp , tmp2
+  COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: commut_interp
+  REAL(kind=DP), EXTERNAL :: efermig , efermit
+  REAL(kind=DP), EXTERNAL ::  w0gauss
+  REAL(kind=DP), EXTERNAL ::  wgauss
+  INTEGER ::  is , iw , iband1, iband2 , num_energy_dos
+  REAL(kind=DP) :: nelec, ef, arg, tpiovera, alpha, diff_occ, delta_e, w
+  REAL(kind=DP), DIMENSION(6) :: plasma_freq
+  REAL(kind=DP), DIMENSION(:), ALLOCATABLE :: wk
+  INTEGER, DIMENSION(:), ALLOCATABLE :: isk
+  REAL(kind=DP), DIMENSION(:,:), ALLOCATABLE :: energy_sum
+  REAL(kind=DP), DIMENSION(:,:), ALLOCATABLE :: focc, focc_tmp, eps_im, eps_re, eps_tot_re, eps_tot_im, eels
+  REAL(kind=DP), DIMENSION(:), ALLOCATABLE :: wgrid, wgrid_dos, dos , jdos , eps_avg_re, eps_avg_im 
+  REAL(kind=DP), DIMENSION(:), ALLOCATABLE :: refractive_index , extinct_coeff , reflectivity
+  REAL(kind=DP) :: lorentzian, matrix_element2 , norm_epsilon
+  REAL(kind=DP) :: delta_e_thr = 1.0E-6   ! in eV
+  COMPLEX(kind=DP), DIMENSION(:,:), ALLOCATABLE  :: matrix_element
+  INTEGER :: num_tetra
+  INTEGER, allocatable :: tetra(:,:)
+  REAL(kind=DP), allocatable :: weights(:) 
+  CHARACTER(len=256), DIMENSION(6) :: headline
+  CHARACTER(256) :: str
+  INTEGER, EXTERNAL :: find_free_unit
+  !
+  call start_clock('dielectric')
+  !
+  call initialize_eigen(eig)
+  !
+  allocate(tmp(sh%ntot_e,sh%num_bands))
+  allocate(tmp2(sh%num_bands,sh%num_bands))
+  if (input%nonlocal_commutator .and. sh%nonlocal_commutator) then
+    allocate(commut_interp(sh%ntot_e,sh%ntot_e,3))
+  endif
+  allocate(focc(sh%num_bands,kpnts%nk),focc_tmp(sh%num_bands,energy%nk_loc))
+  allocate(wgrid(input%nw),jdos(input%nw),eps_im(input%nw,3),eps_re(input%nw,3),eels(input%nw,3))
+  allocate(eps_tot_im(input%nw,3),eps_tot_re(input%nw,3))
+  allocate(eps_avg_im(input%nw),eps_avg_re(input%nw),refractive_index(input%nw),extinct_coeff(input%nw),reflectivity(input%nw))
+  allocate(matrix_element(sh%num_bands,sh%num_bands))
+  !
+  !!!!!!! Interpolation of the bands
+  counting = 0  !DEBUG
+  tpiovera = 2.d0*pi/sh%alat
+  write(stdout,*) ' '
+  write(stdout,*) 'Computing the band structure...'
+  write(stdout,*) ' '
+  !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ik,q) FIRSTPRIVATE(eig) 
+  do ik=1,energy%nk_loc
+     !
+     q(1:3) = kpnts%qk(1:3,ik + energy%ik_first - 1)
+     call diagonalization(q,sh,input,eig,ik,kpnts)
+     energy%energy(1:sh%num_bands,ik) = eig%energy(1:sh%num_bands) ! Save bands in energy%energy
+     !
+     counting = counting + 1  !DEBUG
+     write(stdout,*)  '*****************************************************************'  !DEBUG
+     write(stdout,*) 'k-point:', counting
+     write(stdout,*) 'k-point coordinates:', q  !DEBUG
+     write(stdout,*) 'Interpolated bands:' , energy%energy(1:sh%num_bands,ik)*rytoev !DEBUG
+     write(stdout,*) ' '
+  enddo
+  !$OMP END PARALLEL DO
+  !!!!!!! End interpolation of the bands
+  !
+  allocate(energy_sum(sh%num_bands,kpnts%nk))
+  energy_sum = 0.d0
+  ! Gather all the bands divided in the various processors by subgroups of k-points
+  do ik =1, energy%nk_loc
+    energy_sum(1:sh%num_bands,ik+energy%ik_first-1) = energy%energy(1:sh%num_bands, ik)
+  enddo
+  call mp_sum(energy_sum,world_comm)
+  !
+  !!!!!!! Fermi energy calculation
+  if (input%fermi_energy == -1) then
+    write(stdout,*) ' '
+    write(stdout,*) 'Computing the Fermi energy...'
+    nelec = sh%nelec
+    allocate(wk(1:kpnts%nk),isk(1:kpnts%nk))  ! kpnts%nk = total number of k-points in interp_grid
+    wk(1:kpnts%nk) = 2.d0/(sh%npol*dble(kpnts%nk))   ! uniform weights
+    isk(1:kpnts%nk) = 1
+    is = 0
+    !
+    if (input%tetrahedron_method) then
+        ! Tetrahedra
+        num_tetra = 6 * kpnts%nk
+        allocate(tetra(4,num_tetra))
+        allocate(weights(kpnts%nk))
+        call tetrahedra1(kpnts%nkgrid(1), kpnts%nkgrid(2), kpnts%nkgrid(3), num_tetra, tetra)
+        ef = efermit (energy_sum, sh%num_bands, kpnts%nk, nelec, sh%nspin, num_tetra, tetra, is, isk)
+        write(stdout,'(a,f10.5,a)') ' Fermi energy (tetrahedra) = ', ef*rytoev , ' eV'
+    else
+        ! Broadening methods
+        ef = efermig (energy_sum, sh%num_bands, kpnts%nk, nelec, wk, input%fermi_degauss, input%fermi_ngauss, is, isk)
+        write(stdout,'(a,f10.5,a)') ' Fermi energy (broadening method) = ', ef*rytoev , ' eV'
+    endif
+    deallocate(wk,isk)
+  !
+  else
+  !
+    write(stdout,*) 'Reading the Fermi energy from input...'
+    ef = input%fermi_energy
+    write(stdout,*) 'Fermi energy = ', ef*rytoev
+  endif
+  !!!!!!! End Fermi energy calculation
+  !
+  ! Occupation of the states (with Fermi-Dirac distribution)
+  focc = 0.0d0
+  do ik=1,energy%nk_loc
+    do ii=1,sh%num_bands
+      arg = (ef - energy%energy(ii,ik))/input%elec_temp
+      focc_tmp(ii,ik) = wgauss(arg,-99)
+    enddo
+  enddo
+  do ik=1,energy%nk_loc
+    focc(1:sh%num_bands,ik+energy%ik_first-1) = focc_tmp(1:sh%num_bands,ik)
+  enddo
+  call mp_sum(focc,world_comm) 
+  !
+  !!! Calculate DOS  (units: states/eV)
+  headline(6) = "       Energy grid [eV]               DOS"
+  num_energy_dos = int( ( maxval(energy_sum(sh%num_bands,:)) - minval(energy_sum(1,:)) )&
+                    & /input%delta_energy_dos + 0.5) + 1
+  allocate(wgrid_dos(1:num_energy_dos),dos(1:num_energy_dos))
+  wgrid_dos = 0.0d0
+  do iw = 1, num_energy_dos
+      wgrid_dos(iw) = minval(energy_sum(1,:)) + (iw-1) * input%delta_energy_dos ! energy grid for DOS
+  enddo
+  ! 
+  dos = 0.d0
+  if (input%tetrahedron_method) then
+    ! Calculate DOS (Tetrahedron method)
+    write(stdout,*) 'Computing the DOS (Tetrahedron method)...'
+    do iw = 1, num_energy_dos
+      w = wgrid_dos(iw)
+      do ii=1, sh%num_bands
+        call weights_delta1(w, energy_sum(ii,:), num_tetra, tetra, kpnts%nk, weights)
+        do ik=1,kpnts%nk
+          dos(iw) = dos(iw) + weights(ik)
+        enddo
+      enddo
+    enddo
+    dos(1:num_energy_dos) = 2.d0 * dos(1:num_energy_dos) / sh%npol
+    if (ionode) then
+      write(stdout,"(/,1x, 'Writing output on file...' )")
+      call writetofile(input%prefix,"dos_tetrahedra",headline(6),num_energy_dos,wgrid_dos,1,dos/rytoev)
+    endif
+  else
+    ! Calculate DOS (Broadening methods)
+    write(stdout,*) ' '
+    write(stdout,*) 'Computing the DOS (Broadening method)...'
+    !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ik,ii,iw,w,arg) &
+    !$OMP FIRSTPRIVATE(eig) REDUCTION(+:dos)
+    do ik=1,energy%nk_loc
+        do ii=1, sh%num_bands
+           do iw = 1, num_energy_dos
+             w = wgrid_dos(iw)
+             !! Approximate Dirac-delta with the derivative of the Fermi-Dirac function
+             arg = (energy%energy(ii, ik) - w)/input%inter_broadening
+             dos(iw) = dos(iw) + w0gauss(arg,input%drude_ngauss)/input%inter_broadening
+           enddo
+         enddo
+    enddo
+    !$OMP END PARALLEL DO
+    call mp_sum(dos,world_comm)
+    dos(1:num_energy_dos) = 2.d0*dos(1:num_energy_dos) / (sh%npol*dble(kpnts%nk))
+    if (ionode) then
+        write(stdout,"(/,1x, 'Writing output on file...' )")
+        call writetofile(input%prefix,"dos_broadening",headline(6),num_energy_dos,wgrid_dos,1,dos/rytoev)
+    endif
+  endif
+  deallocate(wgrid_dos,dos,energy_sum)
+  !!! End calculation of DOS
+  !
+  ! Calculate energy grid for dielectric function (in Ry)
+  alpha = (input%wmax - input%wmin) / REAL(input%nw-1, KIND=DP)
+  wgrid = 0.d0
+  do iw = 1, input%nw
+      wgrid(iw) = input%wmin + (iw-1) * alpha
+  enddo
+  !
+  ! Calculate dielectric function
+  jdos = 0.d0
+  plasma_freq = 0.d0
+  eps_im = 0.d0
+  eps_re = 0.d0
+  counting = 0
+  write(stdout,*) ' '
+  write(stdout,*) 'Computing the IP optical properties...'
+  !$OMP PARALLEL DO DEFAULT(SHARED) &
+  !$OMP PRIVATE(ik,q,idir,tmp,tmp2,ii,matrix_element,iband1,iband2,diff_occ,delta_e,matrix_element2,iw,w,arg) &
+  !$OMP FIRSTPRIVATE(eig,commut_interp) REDUCTION(+:eps_im, eps_re, jdos, plasma_freq)
+  do ik=1,energy%nk_loc
+     !
+     counting = counting + 1
+     write(stdout,*) 'k-point:', counting
+     q(1:3) = kpnts%qk(1:3,ik + energy%ik_first - 1)
+     call diagonalization(q,sh,input,eig,ik,kpnts)
+     energy%energy(1:sh%num_bands,ik) = eig%energy(1:sh%num_bands) ! Save bands in energy%energy
+     !
+     !call start_clock('optic_elements')
+     do idir=1,3
+        !
+        ! Calculate: 1/2 \sum_ij d_i* d_j K1_ij (local matrix: <f_nk|p|f_n'k> for every n,n')
+        call ZGEMM('N','N',sh%ntot_e,sh%num_bands,sh%ntot_e,(1.d0,0.d0), &
+        & sh%h1(1,1,idir),sh%ntot_e,eig%wave_func,sh%ntot_e,(0.d0,0.d0),tmp,sh%ntot_e)
+        call ZGEMM('C','N',sh%num_bands,sh%num_bands,sh%ntot_e,(1.d0,0.d0), eig%wave_func,sh%ntot_e,&
+        & tmp,sh%ntot_e,(0.d0,0.d0),tmp2,sh%num_bands)
+        do ii=1,sh%num_bands
+            energy%energy_der(idir,ii,ik) = dble(tmp2(ii,ii))/2.d0 ! We need to divide by 2 (from the definition of K1_ij)
+        enddo
+        !
+        ! Local part: (k + G) --> -i[r,H(k)] = (hbar/m)*(p + hbar*k)
+        ! (The factor 2 comes from the QE units: m=1/2, hbar=1)
+        ! Save <nk|p|nk> in the variable energy_der
+        energy%energy_der(idir,1:sh%num_bands,ik) = 2.0*( tpiovera*q(idir) + energy%energy_der(idir,1:sh%num_bands,ik) )
+        !
+        matrix_element = 0.d0
+        if (.not. input%nonlocal_commutator .or. .not. sh%nonlocal_commutator) then
+           ! Dielectric function calculation (local part: <nk|p|n'k>)
+           do iband1=1, sh%num_bands
+            do iband2=1, sh%num_bands
+             !
+             if (iband1 == iband2) cycle
+             !
+             delta_e = energy%energy(iband2,ik) - energy%energy(iband1,ik)  ! Note: we define dE = E_i - E_j  while df = f_j - f_i
+             if (abs(delta_e) <= delta_e_thr/rytoev ) cycle    ! We do not consider transitions with very small dE
+             diff_occ = focc(iband1,ik + energy%ik_first - 1) - focc(iband2,ik + energy%ik_first - 1)
+             !
+             matrix_element(iband1,iband2) = tmp2(iband1,iband2)
+             !matrix_element2 = |<nk|p|n'k>|^2 / (E_nk - E_n'k)^2
+             matrix_element2 = dble( matrix_element(iband1,iband2) * conjg(matrix_element(iband1,iband2)) ) / delta_e**2
+             do iw = 1, input%nw
+                   w = wgrid(iw)
+                   arg = (delta_e - w)/input%inter_broadening
+                   eps_re(iw,idir) = eps_re(iw,idir) + diff_occ*matrix_element2* ( &
+                                    & (w - delta_e)/( (w-delta_e)**2 + input%inter_broadening**2 ) )
+                   eps_im(iw,idir) = eps_im(iw,idir) + diff_occ*matrix_element2* &
+                                    & lorentzian(arg*input%inter_broadening, input%inter_broadening)
+                   ! NOTE: with the formula above both resonant and anti-resonant terms are included
+                   !
+                   if (idir == 1) then
+                     ! JDOS calculation (equal to eps_im with matrix_element=1)
+                     jdos(iw) = jdos(iw) + diff_occ*w0gauss(arg,input%drude_ngauss)/input%inter_broadening
+                     !jdos(iw) = jdos(iw) + diff_occ * lorentzian(arg*input%inter_broadening, input%inter_broadening) / delta_e**2
+                   endif
+                   !
+             enddo
+             !
+            enddo
+           enddo
+           ! End dielectric function calculation (local part)
+           !
+        ! Non-local matrix: <f_nk|[r,V_nl]|f_n'k>
+        elseif (input%nonlocal_commutator .and. sh%nonlocal_commutator) then
+             !
+             matrix_element(1:sh%num_bands,1:sh%num_bands) = tmp2(1:sh%num_bands,1:sh%num_bands) ! store local contribution
+             !
+             if (input%nonlocal_interpolation) then
+               call trilinear_parallel_commut(kpnts,ik,sh,commut_interp)
+               call ZGEMM('N','N',sh%ntot_e,sh%num_bands,sh%ntot_e,(1.d0,0.d0), &
+               & commut_interp(1,1,idir),sh%ntot_e,eig%wave_func,sh%ntot_e,(0.d0,0.d0),tmp,sh%ntot_e)
+               call ZGEMM('C','N',sh%num_bands,sh%num_bands,sh%ntot_e,(1.d0,0.d0), eig%wave_func,sh%ntot_e,&
+               & tmp,sh%ntot_e,(0.d0,0.d0),tmp2,sh%num_bands)
+             else
+               call ZGEMM('N','N',sh%ntot_e,sh%num_bands,sh%ntot_e,(1.d0,0.d0), &
+               & sh%commut(1,1,idir,ik),sh%ntot_e,eig%wave_func,sh%ntot_e,(0.d0,0.d0),tmp,sh%ntot_e)
+               call ZGEMM('C','N',sh%num_bands,sh%num_bands,sh%ntot_e,(1.d0,0.d0), eig%wave_func,sh%ntot_e,&
+               & tmp,sh%ntot_e,(0.d0,0.d0),tmp2,sh%num_bands)
+             endif
+             !
+             ! Add the non-local contribution: energy_der = <nk|p|nk> + <nk|[r,V_nl]|nk>
+             do ii=1,sh%num_bands
+                 energy%energy_der(idir,ii,ik) = energy%energy_der(idir,ii,ik) + dble(tmp2(ii,ii))
+             enddo
+             !
+             ! Dielectric function calculation (non-local part)
+             do iband1=1, sh%num_bands
+              do iband2=1, sh%num_bands
+                 !
+                 if (iband1 == iband2) cycle
+                 !
+                 delta_e = energy%energy(iband2,ik) - energy%energy(iband1,ik)
+                 if (abs(delta_e) <= delta_e_thr/rytoev ) cycle    ! We do not consider transitions with very small dE
+                 diff_occ = focc(iband1,ik + energy%ik_first - 1) - focc(iband2,ik + energy%ik_first - 1)
+                 !
+                 ! matrix_element =  <nk|p|n'k> + <nk|[r,V_nl]|n'k>
+                 matrix_element(iband1,iband2) = matrix_element(iband1,iband2) + tmp2(iband1,iband2)
+                 matrix_element2 = dble( matrix_element(iband1,iband2) * conjg(matrix_element(iband1,iband2)) )  / delta_e**2
+                 !
+                 do iw = 1, input%nw
+                   w = wgrid(iw)
+                   arg = (delta_e - w)/input%inter_broadening
+                   eps_re(iw,idir) = eps_re(iw,idir) + diff_occ*matrix_element2* ( &
+                                    & (w - delta_e)/( (w-delta_e)**2 + input%inter_broadening**2 ) )
+                   eps_im(iw,idir) = eps_im(iw,idir) + diff_occ*matrix_element2* &
+                                    & lorentzian(arg*input%inter_broadening, input%inter_broadening)
+                   ! NOTE: with the formula above both resonant and anti-resonant terms are included
+                   !
+                   if (idir == 1) then
+                     ! JDOS calculation (eps_im with matrix_element=1)
+                     jdos(iw) = jdos(iw) + diff_occ*w0gauss(arg,input%drude_ngauss)/input%inter_broadening
+                     !jdos(iw) = jdos(iw) + diff_occ * lorentzian(arg*input%inter_broadening, input%inter_broadening) / delta_e**2
+                   endif
+                   !
+                 enddo
+                 !
+               enddo
+             enddo
+             ! End dielectric function calculation (non-local part)
+             !
+        endif
+        !
+     enddo
+     !
+     ! Calculate the Drude plasma frequency
+     do ii=1,sh%num_bands
+            ! w0gauss = 1.0d0 / (2.0d0 + exp ( - x) + exp ( + x) ) for Fermi-Dirac smearing (ngauss=-99)
+            ! dirac_delta = -df/dE = w0gauss/degauss
+            ! 1 = xx
+            ! 2 = yy
+            ! 3 = zz
+            ! 4 = xy
+            ! 5 = xz
+            ! 6 = yz
+            arg = (ef - energy%energy(ii,ik))/input%drude_degauss
+            plasma_freq(1) = plasma_freq(1) + energy%energy_der(1,ii,ik)**2*w0gauss(arg, &
+            & input%drude_ngauss)/input%drude_degauss
+            plasma_freq(2) = plasma_freq(2) + energy%energy_der(2,ii,ik)**2*w0gauss(arg, &
+            & input%drude_ngauss)/input%drude_degauss
+            plasma_freq(3) = plasma_freq(3) + energy%energy_der(3,ii,ik)**2*w0gauss(arg, &
+            & input%drude_ngauss)/input%drude_degauss
+            plasma_freq(4) = plasma_freq(4) + energy%energy_der(1,ii,ik)*energy%energy_der(2,ii,&
+            & ik)*w0gauss(arg,input%drude_ngauss)/input%drude_degauss
+            plasma_freq(5) = plasma_freq(5) + energy%energy_der(1,ii,ik)*energy%energy_der(3,ii,&
+            & ik)*w0gauss(arg,input%drude_ngauss)/input%drude_degauss
+            plasma_freq(6) = plasma_freq(6) + energy%energy_der(2,ii,ik)*energy%energy_der(3,ii,&
+            & ik)*w0gauss(arg,input%drude_ngauss)/input%drude_degauss
+     enddo
+     ! End calculation of Drude plasma frequency
+     !call stop_clock('optic_elements')
+     !
+  enddo
+  !$OMP END PARALLEL DO
+  !
+  call mp_sum(plasma_freq,world_comm)
+  call mp_sum(jdos,world_comm)
+  call mp_sum(eps_im,world_comm)
+  call mp_sum(eps_re,world_comm)
+  !
+  ! Drude plasma frequency in eV
+  plasma_freq(1:6) = sqrt( 16.d0 * pi * rytoev**2 * abs(plasma_freq(1:6)) / ( sh%npol * dble(kpnts%nk) * sh%omega ) )
+  !
+  write(stdout,*) ' '
+  write(stdout,'(a,f10.5,a)') ' Drude plasma frequency (xx) = ', plasma_freq(1) , ' eV'
+  write(stdout,'(a,f10.5,a)') ' Drude plasma frequency (yy) = ', plasma_freq(2) , ' eV'
+  write(stdout,'(a,f10.5,a)') ' Drude plasma frequency (zz) = ', plasma_freq(3) , ' eV'
+  write(stdout,'(a,f10.5,a)') ' Drude plasma frequency (xy) = ', plasma_freq(4) , ' eV'
+  write(stdout,'(a,f10.5,a)') ' Drude plasma frequency (xz) = ', plasma_freq(5) , ' eV'
+  write(stdout,'(a,f10.5,a)') ' Drude plasma frequency (yz) = ', plasma_freq(6) , ' eV'
+  !
+  ! JDOS/w^2
+  ! If the matrix elements are ~ 1 --> eps_2(w) ~ JDOS(w)/w^2
+  jdos(1:input%nw) = 16.d0 * pi**2 * jdos(1:input%nw) / (sh%npol * dble(kpnts%nk) * sh%omega ) / rytoev
+  jdos(1:input%nw) = jdos(1:input%nw) / wgrid(1:input%nw)**2
+  !
+  ! Dielectric function (interband)
+  eps_im(1:input%nw,1:3) = 16.d0 * pi**2 * eps_im(1:input%nw,1:3) / (sh%npol * dble(kpnts%nk) * sh%omega )
+  eps_re(1:input%nw,1:3) = 1.d0 - 16.d0 * pi * eps_re(1:input%nw,1:3) / (sh%npol * dble(kpnts%nk) * sh%omega )
+  !
+  ! Total dielectric function (interband + Drude)
+  eps_tot_im = 0.d0
+  eps_tot_re = 0.d0
+  do idir=1,3
+    do iw = 1, input%nw
+      w = wgrid(iw)
+      eps_tot_im(iw,idir) = eps_im(iw,idir) + (plasma_freq(idir)**2/rytoev**2) &
+                        & * input%intra_broadening / (w*(w**2 + input%intra_broadening**2))
+      eps_tot_re(iw,idir) = eps_re(iw,idir) - (plasma_freq(idir)**2/rytoev**2) &
+                        & / (w**2 + input%intra_broadening**2)
+    enddo
+  enddo
+  !
+  ! EELS
+  eels = 0.d0
+  do idir=1,3
+    do iw = 1, input%nw
+      eels(iw,idir) = eps_tot_im(iw,idir) / (eps_tot_im(iw,idir)**2 + eps_tot_re(iw,idir)**2)
+    enddo
+  enddo
+  !
+  ! Average total dielectric function (average over x, y, z)
+  eps_avg_im = 0.d0
+  eps_avg_re = 0.d0
+  do iw = 1, input%nw
+      eps_avg_im(iw) = ( eps_tot_im(iw,1) + eps_tot_im(iw,2) + eps_tot_im(iw,3)  ) / 3.d0
+      eps_avg_re(iw) = ( eps_tot_re(iw,1) + eps_tot_re(iw,2) + eps_tot_re(iw,3)  ) / 3.d0
+  enddo
+  !
+  ! Refractive index (from the average dielectric function)
+  refractive_index = 0.d0
+  extinct_coeff = 0.d0
+  do iw = 1, input%nw
+      norm_epsilon = sqrt(eps_avg_re(iw)**2 + eps_avg_im(iw)**2)
+      refractive_index(iw) = sqrt( ( eps_avg_re(iw) + norm_epsilon  ) / 2.d0 )
+      extinct_coeff(iw) = sqrt( ( -eps_avg_re(iw) + norm_epsilon  ) / 2.d0 )
+  enddo
+  !
+  ! Reflectivity
+  reflectivity = 0.d0
+  do iw = 1, input%nw
+      reflectivity(iw) = ( (refractive_index(iw) - 1.d0)**2 + extinct_coeff(iw)**2 ) / &
+                        & ( (refractive_index(iw) + 1.d0)**2 + extinct_coeff(iw)**2 )
+  enddo
+  !
+  ! Write outputs to file
+  headline(1) = "       Energy grid [eV]               Re(eps)_x                Re(eps)_y                Re(eps)_z"
+  headline(2) = "       Energy grid [eV]               Im(eps)_x                Im(eps)_y                Im(eps)_z"
+  headline(3) = "       Energy grid [eV]               JDOS/w^2"
+  headline(4) = "       Energy grid [eV]               EELS_x                   EELS_y                   EELS_z"
+  headline(5) = "       Energy grid [eV]               Re(eps)                  Im(eps)               refractive_index &
+         &         extinct_coeff             reflectivity"
+  !
+  if (ionode) then
+    write(stdout,"(/,1x, 'Writing output on file...' )")
+    call writetofile(input%prefix,"eps_inter_re",headline(1),input%nw,wgrid,3,eps_re)
+    call writetofile(input%prefix,"eps_inter_im",headline(2),input%nw,wgrid,3,eps_im)
+    call writetofile(input%prefix,"jdos",headline(3),input%nw,wgrid,1,jdos)
+    call writetofile(input%prefix,"eels",headline(4),input%nw,wgrid,3,eels)
+    call writetofile(input%prefix,"eps_tot_re",headline(1),input%nw,wgrid,3,eps_tot_re)
+    call writetofile(input%prefix,"eps_tot_im",headline(2),input%nw,wgrid,3,eps_tot_im)
+  endif
+  !
+  if (ionode) then
+    str = TRIM(input%prefix) // "." // TRIM("optical_constants") // ".dat"
+    iun=find_free_unit()
+    open(iun,FILE=TRIM(str))
+    !
+    write(iun,"(a)") "# "// TRIM(headline(5))
+    write(iun,"(a)") "#"
+    !
+    do iw = 1, input%nw
+       write(iun,"(10f25.6)") wgrid(iw)*rytoev, eps_avg_re(iw), eps_avg_im(iw), refractive_index(iw), &
+                              & extinct_coeff(iw), reflectivity(iw)
+    enddo
+    !
+    close(iun)
+    !
+    write(stdout,*) 'File ', TRIM(str), ' written'
+  endif
+  !
+  if (input%tetrahedron_method) then
+    deallocate(tetra,weights)
+  endif
+  deallocate(tmp,tmp2)
+  if (input%nonlocal_commutator .and. sh%nonlocal_commutator) then
+    deallocate(commut_interp)
+  endif
+  deallocate(focc,focc_tmp,wgrid,jdos,eps_im,eps_re,eps_tot_re,eps_tot_im)
+  deallocate(eels,eps_avg_im,eps_avg_re,refractive_index,extinct_coeff,reflectivity)
+  deallocate(matrix_element)
+  call deallocate_eigen(eig)
+  !
+  write(stdout,*) ' '
+  write(stdout,*) '*************************************'
+  write(stdout,*) 'Optical properties computed and saved'
+  write(stdout,*) '*************************************'
+  !
+  ! Write in output the input parameters
+  write(stdout,*) '                     '
+  write(stdout,*) '                     '
+  write(stdout,*) 'INPUT PARAMETERS:'
+  write(stdout,*)           'interpolation k-grid = ', input%interp_grid(1:3)
+  if (.not. input%nonlocal_commutator .or. .not. sh%nonlocal_commutator) then
+    write(stdout,*)           'nonlocal_commutator =    False'
+  elseif (input%nonlocal_commutator .and. sh%nonlocal_commutator) then
+    write(stdout,*)           'nonlocal_commutator =    True'
+  endif
+  if (input%tetrahedron_method) then
+    write(stdout,*)           'tetrahedron_method =    True'
+  endif
+  if (input%nonlocal_interpolation) then
+    write(stdout,*)           'nonlocal_interpolation =    True'
+  endif
+  write(stdout,'(a,f10.5)') ' fermi_degauss [eV] = ', input%fermi_degauss*rytoev
+  write(stdout,*)           'fermi_ngauss = ',  input%fermi_ngauss
+  write(stdout,'(a,f10.5)') ' drude_degauss [eV] = ', input%drude_degauss*rytoev
+  write(stdout,'(a,f10.5)') ' elec_temp [eV] = ', input%elec_temp*rytoev
+  write(stdout,'(a,f10.5)') ' inter broadening [eV] = ', input%inter_broadening*rytoev
+  write(stdout,'(a,f10.5)') ' intra broadening [eV] = ', input%intra_broadening*rytoev
+  write(stdout,'(a,f10.5)') ' s_bands [a.u.] = ', sh%s_bands
+  write(stdout,*) ' '
+  !
+  !call stop_clock('dielectric')
+  !
+end subroutine dielectric
+
+subroutine writetofile(prefix,name,headline,nw,wgrid,ncolumn,var)
+  !------------------------------------------------------------------
+  !
+  USE kinds,          ONLY : DP
+  USE constants, ONLY : rytoev
+  USE io_global, ONLY : stdout
+  !
+  IMPLICIT NONE
+  !
+  CHARACTER(LEN=*),   INTENT(IN) :: prefix
+  CHARACTER(LEN=*),   INTENT(IN) :: name
+  CHARACTER(LEN=*),   INTENT(IN) :: headline
+  INTEGER,            INTENT(IN) :: nw, ncolumn
+  REAL(kind=DP),           INTENT(IN) :: wgrid(nw)
+  REAL(kind=DP),           INTENT(IN) :: var(nw,ncolumn)
+  !
+  CHARACTER(256) :: str
+  INTEGER        :: iw, iun
+  INTEGER, EXTERNAL :: find_free_unit
+
+
+  str = TRIM(prefix) // "." // TRIM(name) // ".dat"
+  iun=find_free_unit()
+  open(iun,FILE=TRIM(str))
+  !
+  write(iun,"(a)") "# "// TRIM(headline)
+  write(iun,"(a)") "#"
+  !
+  do iw = 1, nw
+     write(iun,"(10f25.6)") wgrid(iw)*rytoev, var(iw,1:ncolumn)
+  enddo
+  !
+  close(iun)
+  !
+  WRITE(STDOUT,*) 'File ', TRIM(str), ' written'
+end subroutine writetofile
+
+
+
+
+function lorentzian (x, smr)
+  !-----------------------------------------------------------------------
+  !
+  !     this function computes the Lorentzian function at the point x with
+  !     smearing smr.
+  !
+  USE kinds, ONLY : DP
+  USE constants, ONLY : pi
+  USE io_global, ONLY : stdout
+  implicit none
+
+  REAL(kind=DP) :: lorentzian
+  REAL(kind=DP) :: x, smr
+  ! output: the value of the function   (lorentzian)
+  ! input: the argument of the function (x)
+  ! input: the smearing of the function (smr)
+
+  lorentzian = smr / (  pi*(  x**2 + smr**2  ) )
+  return
+end function lorentzian
+
+
+
diff --git a/GWW/simple_ip/input_simple_ip.f90 b/GWW/simple_ip/input_simple_ip.f90
new file mode 100644
index 000000000..89277e3f6
--- /dev/null
+++ b/GWW/simple_ip/input_simple_ip.f90
@@ -0,0 +1,79 @@
+MODULE input_simple_ip
+!this module provides input file routines 
+
+  USE kinds, ONLY: DP
+
+   TYPE input_options_simple_ip
+      CHARACTER(len=256) :: prefix = 'prefix'!prefix to designate the files same as in PW        
+      CHARACTER(len=256) :: outdir = './'!outdir to designate the files same as in PW   
+      INTEGER :: h_level = 2 ! terms included in the k-hamiltonian (for debug) 0=kinetic, 1=kinetic+local >1=kinetic+local+non-local
+      INTEGER :: interp_grid(3)    !  k-grid used for the calculation of the optical spectra (Shirley interpolation)
+      LOGICAL :: nonlocal_commutator=.true.  ! if true it includes the non-local part of [r,H] in the calculation of the optical spectra
+      LOGICAL :: nonlocal_interpolation = .false.   ! if true it interpolates the non-local part of the psp (WARNING: experimental feature)
+      LOGICAL :: tetrahedron_method = .false.    ! Use tetrahedron method to calculate Drude plasma frequency, Fermi energy and DOS
+      REAL(kind=DP) :: fermi_degauss   ! degauss (in Ry) for Fermi level calculation
+      REAL(kind=DP) :: fermi_energy = -1   ! input Fermi energy (in Ry) (if not given in input it is calculated using the Shirley interpolated bands)
+      INTEGER  :: fermi_ngauss         ! ngauss for Fermi level calculation (n=-99 --> Fermi-Dirac ; n=-1 --> cold-smearing ; n>=0 --> Methfessel-Paxton)
+      REAL(kind=DP) :: drude_degauss   ! degauss (in Ry) for Drude plasma frequency calculation
+      INTEGER  :: drude_ngauss = -99        ! ngauss for Drude plasma frequency calculation (n=-99 --> Fermi-Dirac) (WARNING: only Fermi-Dirac smearing is implemented now)
+      REAL(kind=DP) :: elec_temp = 0.0018375      ! electronic temperature (in Ry) for the occupation of the electronic states. Default is room temperature
+      REAL(kind=DP) :: wmin, wmax         ! Minimun and maximum energy of the energy interval [wmin,wmax] (in Ry)
+      INTEGER :: nw                 ! Number of points in the energy interval [wmin,wmax]
+      REAL(kind=DP) ::  inter_broadening   ! broadening used for the optical spectra (interband part)
+      REAL(kind=DP) ::  intra_broadening   ! broadening used for the optical spectra (intraband part)
+      REAL(kind=DP) ::  delta_energy_dos = 0.000735  ! Energy spacing for DOS calculation (in Ry) (default is 0.01 eV)
+
+   END TYPE input_options_simple_ip
+
+   CONTAINS
+
+     SUBROUTINE  read_input_simple_ip( simpleip_in )
+       USE io_global,            ONLY : stdout, ionode, ionode_id
+       USE mp,                   ONLY : mp_bcast
+       USE mp_world,             ONLY : world_comm
+       USE io_files,             ONLY : tmp_dir, prefix
+       
+
+       implicit none
+
+       CHARACTER(LEN=256), EXTERNAL :: trimcheck
+       TYPE(input_options_simple_ip) :: simpleip_in          !in output the input parameters
+
+       NAMELIST/inputsimpleip/simpleip_in
+
+	CHARACTER(LEN=256) :: outdir
+
+       if(ionode) then
+          read(*,NML=inputsimpleip)
+          outdir = trimcheck(simpleip_in%outdir)
+          tmp_dir = outdir
+          prefix = trim(simpleip_in%prefix)
+       endif
+
+       call mp_bcast( outdir,ionode_id, world_comm )
+       call mp_bcast( tmp_dir,ionode_id, world_comm )
+       call mp_bcast( prefix,ionode_id, world_comm )
+       call mp_bcast( simpleip_in%interp_grid, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%h_level, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%nonlocal_commutator, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%nonlocal_interpolation, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%fermi_degauss, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%fermi_energy, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%fermi_ngauss, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%drude_degauss, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%drude_ngauss, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%elec_temp, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%wmin, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%wmax, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%nw, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%inter_broadening, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%intra_broadening, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%tetrahedron_method, ionode_id, world_comm)
+       call mp_bcast( simpleip_in%delta_energy_dos, ionode_id, world_comm)
+
+       return
+
+     END SUBROUTINE read_input_simple_ip
+
+END MODULE input_simple_ip
+
diff --git a/GWW/simple_ip/interpolation.f90 b/GWW/simple_ip/interpolation.f90
new file mode 100644
index 000000000..1d942f047
--- /dev/null
+++ b/GWW/simple_ip/interpolation.f90
@@ -0,0 +1,253 @@
+! This subroutine uses the trilinear interpolation to interpolate the non-local part of the pseudopotential.
+! Algorithm from Wikipedia
+
+
+subroutine  trilinear_parallel_nc(kptns,ik,sh,c)
+  USE kinds, ONLY : DP 
+  USE constants, ONLY : pi   ! DEBUG
+  USE io_global, ONLY : stdout ! DEBUG
+  USE simple_ip_objects
+  !
+  IMPLICIT NONE
+  !
+  TYPE(kpoints) :: kptns
+  TYPE(shirley) :: sh
+  INTEGER, INTENT(in) :: ik ! dense local k-grid index
+  COMPLEX(kind=DP), INTENT(out) :: c(sh%nkb,sh%npol,sh%ntot_e)  ! Trilinear interpolated projector (non-collinear)
+  COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: c000, c100, c010, c110, c001, c101, c011, c111 
+  COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: c00, c01, c10, c11, c0, c1
+  REAL(kind=DP) ::  delta(3)
+
+  allocate(c000(sh%nkb,sh%npol,sh%ntot_e),c100(sh%nkb,sh%npol,sh%ntot_e),&
+  & c010(sh%nkb,sh%npol,sh%ntot_e),c110(sh%nkb,sh%npol,sh%ntot_e), &
+  & c001(sh%nkb,sh%npol,sh%ntot_e), c101(sh%nkb,sh%npol,sh%ntot_e), &
+  & c011(sh%nkb,sh%npol,sh%ntot_e), c111(sh%nkb,sh%npol,sh%ntot_e) )
+
+  allocate(c00(sh%nkb,sh%npol,sh%ntot_e),c01(sh%nkb,sh%npol,sh%ntot_e),&
+  & c10(sh%nkb,sh%npol,sh%ntot_e),c11(sh%nkb,sh%npol,sh%ntot_e), &
+  & c0(sh%nkb,sh%npol,sh%ntot_e), c1(sh%nkb,sh%npol,sh%ntot_e))
+  
+  c000(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(1,ik))
+  c100(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(2,ik))
+  c010(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(3,ik))
+  c110(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(4,ik))
+  c001(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(5,ik))
+  c101(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(6,ik))
+  c011(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(7,ik))
+  c111(1:sh%nkb,1:sh%npol,1:sh%ntot_e) = sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e, kptns%coord_cube(8,ik))
+     
+  delta(1:3) = kptns%pos_cube(1:3,ik)
+  
+  c00 = c000*(1.0 - delta(1)) + c100*delta(1)
+  c01 = c001*(1.0 - delta(1)) + c101*delta(1)
+  c10 = c010*(1.0 - delta(1)) + c110*delta(1)
+  c11 = c011*(1.0 - delta(1)) + c111*delta(1)
+
+  c0 = c00*(1.0 - delta(2)) + c10*delta(2)
+  c1 = c01*(1.0 - delta(2)) + c11*delta(2)
+  
+  c = c0*(1.0 - delta(3)) + c1*delta(3)
+
+  deallocate(c000, c100, c010, c110, c001, c101, c011, c111, c00, c01, c10, c11, c0, c1)
+
+end subroutine trilinear_parallel_nc
+
+subroutine  trilinear_parallelc(kptns,ik,sh,c)
+  USE kinds, ONLY : DP 
+  USE simple_ip_objects
+
+  implicit none
+
+  TYPE(kpoints) :: kptns
+  TYPE(shirley) :: sh
+  INTEGER, INTENT(in) :: ik ! dense local k-grid index
+  COMPLEX(kind=DP), INTENT(out) :: c(sh%nkb,sh%ntot_e)  ! Trilinear interpolated projector (collinear)
+  COMPLEX(kind=DP), DIMENSION(:,:), ALLOCATABLE :: c000, c100, c010, c110, c001, c101, c011, c111 
+  COMPLEX(kind=DP), DIMENSION(:,:), ALLOCATABLE :: c00, c01, c10, c11, c0, c1
+  REAL(kind=DP) ::  delta(3)
+
+  allocate(c000(sh%nkb,sh%ntot_e),c100(sh%nkb,sh%ntot_e),&
+  & c010(sh%nkb,sh%ntot_e),c110(sh%nkb,sh%ntot_e), &
+  & c001(sh%nkb,sh%ntot_e), c101(sh%nkb,sh%ntot_e), &
+  & c011(sh%nkb,sh%ntot_e), c111(sh%nkb,sh%ntot_e) )
+
+  allocate(c00(sh%nkb,sh%ntot_e),c01(sh%nkb,sh%ntot_e),&
+  & c10(sh%nkb,sh%ntot_e),c11(sh%nkb,sh%ntot_e), &
+  & c0(sh%nkb,sh%ntot_e), c1(sh%nkb,sh%ntot_e))
+ 
+  c000(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(1,ik))
+  c100(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(2,ik))
+  c010(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(3,ik))
+  c110(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(4,ik))
+  c001(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(5,ik))
+  c101(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(6,ik))
+  c011(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(7,ik))
+  c111(1:sh%nkb,1:sh%ntot_e) = sh%beckc(1:sh%nkb,1:sh%ntot_e, kptns%coord_cube(8,ik))
+     
+  delta(1:3) = kptns%pos_cube(1:3,ik)
+  
+  c00 = c000*(1.0 - delta(1)) + c100*delta(1)
+  c01 = c001*(1.0 - delta(1)) + c101*delta(1)
+  c10 = c010*(1.0 - delta(1)) + c110*delta(1)
+  c11 = c011*(1.0 - delta(1)) + c111*delta(1)
+
+  c0 = c00*(1.0 - delta(2)) + c10*delta(2)
+  c1 = c01*(1.0 - delta(2)) + c11*delta(2)
+  
+  c = c0*(1.0 - delta(3)) + c1*delta(3)
+
+  deallocate(c000, c100, c010, c110, c001, c101, c011, c111, c00, c01, c10, c11, c0, c1)
+
+end subroutine trilinear_parallelc
+
+subroutine  trilinear_parallel_commut(kptns,ik,sh,c)
+  USE kinds, ONLY : DP 
+  USE simple_ip_objects
+
+  implicit none
+
+  TYPE(kpoints) :: kptns
+  TYPE(shirley) :: sh
+  INTEGER, INTENT(in) :: ik ! dense local k-grid index
+  COMPLEX(kind=DP), INTENT(out) :: c(sh%ntot_e,sh%ntot_e,3)  ! Trilinear interpolated commutator
+  COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: c000, c100, c010, c110, c001, c101, c011, c111
+  COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: c00, c01, c10, c11, c0, c1
+  REAL(kind=DP) ::  delta(3)
+
+  allocate(c000(sh%ntot_e,sh%ntot_e,3),c100(sh%ntot_e,sh%ntot_e,3),&
+  & c010(sh%ntot_e,sh%ntot_e,3),c110(sh%ntot_e,sh%ntot_e,3), &
+  & c001(sh%ntot_e,sh%ntot_e,3), c101(sh%ntot_e,sh%ntot_e,3), &
+  & c011(sh%ntot_e,sh%ntot_e,3), c111(sh%ntot_e,sh%ntot_e,3) )
+
+  allocate(c00(sh%ntot_e,sh%ntot_e,3),c01(sh%ntot_e,sh%ntot_e,3),&
+  & c10(sh%ntot_e,sh%ntot_e,3),c11(sh%ntot_e,sh%ntot_e,3), &
+  & c0(sh%ntot_e,sh%ntot_e,3), c1(sh%ntot_e,sh%ntot_e,3))
+ 
+  c000(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(1,ik))
+  c100(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(2,ik))
+  c010(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(3,ik))
+  c110(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(4,ik))
+  c001(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(5,ik))
+  c101(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(6,ik))
+  c011(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(7,ik))
+  c111(1:sh%ntot_e,1:sh%ntot_e,1:3) = sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3, kptns%coord_cube(8,ik))
+     
+  delta(1:3) = kptns%pos_cube(1:3,ik)
+  
+  c00 = c000*(1.0 - delta(1)) + c100*delta(1)
+  c01 = c001*(1.0 - delta(1)) + c101*delta(1)
+  c10 = c010*(1.0 - delta(1)) + c110*delta(1)
+  c11 = c011*(1.0 - delta(1)) + c111*delta(1)
+
+  c0 = c00*(1.0 - delta(2)) + c10*delta(2)
+  c1 = c01*(1.0 - delta(2)) + c11*delta(2)
+  
+  c = c0*(1.0 - delta(3)) + c1*delta(3)
+
+  deallocate(c000, c100, c010, c110, c001, c101, c011, c111, c00, c01, c10, c11, c0, c1)
+
+end subroutine trilinear_parallel_commut
+
+
+
+! OLD ROUTINE (to be deleted)
+subroutine  trilinear(q,nkpoints,bg,at,ndim,beck,c)   
+  USE kinds, ONLY : DP 
+  USE constants, ONLY : pi   ! DEBUG
+  USE io_global, ONLY : stdout ! DEBUG
+
+  implicit none
+
+  REAL(kind=DP), INTENT(in) :: bg(3,3) ! reciprocal-space basis vectors
+  REAL(kind=DP), INTENT(in) :: at(3,3) ! real-space basis vectors
+  REAL(kind=DP), INTENT(in) :: q(3)  ! k-point
+  INTEGER, INTENT(in) :: nkpoints(3) ! interpolation grid
+  INTEGER, INTENT(in) :: ndim
+  INTEGER :: n(3), np(3), i, j
+  COMPLEX(kind=DP), INTENT(in) :: beck(ndim,nkpoints(1)*nkpoints(2)*nkpoints(3)) ! Projector 
+  COMPLEX(kind=DP), DIMENSION(ndim), INTENT(out) :: c  ! Trilinear interpolated projector
+
+  COMPLEX(kind=DP), DIMENSION(ndim) :: c000, c100, c010, c110, c001, c101, c011, c111 
+  COMPLEX(kind=DP), DIMENSION(ndim) :: c00, c01, c10, c11, c0, c1
+  REAL(kind=DP) :: qproj(3), delta(3)
+
+
+  ! Project q on the bg basis (it is obtained doing the scalar product of q with the direct lattice vectors at)
+  qproj(1:3) = 0
+  do i=1,3
+     do j=1,3
+        qproj(i) = qproj(i) + q(j)*at(j,i) 
+     enddo
+  enddo
+  
+  n(1:3) = 0
+  do i=1,3
+     n(i) = int(qproj(i)*nkpoints(i))
+     np(i) = n(i) + 1
+     if ( np(i) >= nkpoints(i) )  np(i) = 0
+  enddo   
+  
+  c000(1:ndim) = beck(1:ndim, n(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + n(3) + 1)
+  c100(1:ndim) = beck(1:ndim, np(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + n(3) + 1)
+  c010(1:ndim) = beck(1:ndim, n(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + n(3) + 1)
+  c110(1:ndim) = beck(1:ndim, np(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + n(3) + 1)
+  c001(1:ndim) = beck(1:ndim, n(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + np(3) + 1)
+  c101(1:ndim) = beck(1:ndim, np(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + np(3) + 1)
+  c011(1:ndim) = beck(1:ndim, n(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + np(3) + 1)
+  c111(1:ndim) = beck(1:ndim, np(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + np(3) + 1)
+  
+  do i=1,3
+     delta(i) = ( qproj(i) - dble(n(i))/dble(nkpoints(i)) ) * dble(nkpoints(i)) !  qproj(i) --> q(i)
+  enddo
+
+  c00 = c000*(1.0 - delta(1)) + c100*delta(1)
+  c01 = c001*(1.0 - delta(1)) + c101*delta(1)
+  c10 = c010*(1.0 - delta(1)) + c110*delta(1)
+  c11 = c011*(1.0 - delta(1)) + c111*delta(1)
+
+  c0 = c00*(1.0 - delta(2)) + c10*delta(2)
+  c1 = c01*(1.0 - delta(2)) + c11*delta(2)
+  
+  c = c0*(1.0 - delta(3)) + c1*delta(3)
+
+  ! DEBUG
+!  write(stdout, *) '**********************INTERPOLATION*************************'
+!  write(stdout, *) 'qproj', qproj(1:3)
+!
+!  write(stdout, *) 'n', n(1:3)
+!  write(stdout, *) 'np', np(1:3)
+!  write(stdout, *) n(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + n(3) + 1
+!  write(stdout, *) np(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + n(3) + 1
+!  write(stdout, *) n(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + n(3) + 1
+!  write(stdout, *) np(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + n(3) + 1
+!  write(stdout, *) n(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + np(3) + 1
+!  write(stdout, *) np(1)*nkpoints(2)*nkpoints(3) + n(2)*nkpoints(3) + np(3) + 1
+!  write(stdout, *) n(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + np(3) + 1
+!  write(stdout, *) np(1)*nkpoints(2)*nkpoints(3) + np(2)*nkpoints(3) + np(3) + 1
+!
+!  write(stdout, *) 'delta', delta(1:3)
+!
+!  write(stdout, *) 'c000', c000(1:1)
+!  write(stdout, *) 'c100', c100(1:1)
+!  write(stdout, *) 'c010', c010(1:1)
+!  write(stdout, *) 'c110', c110(1:1)
+!  write(stdout, *) 'c001', c001(1:1)
+!  write(stdout, *) 'c101', c101(1:1)
+!  write(stdout, *) 'c011', c011(1:1)
+!  write(stdout, *) 'c111', c111(1:1)
+!
+!  write(stdout, *) 'c00', c00(1:1)
+!  write(stdout, *) 'c01', c01(1:1)
+!  write(stdout, *) 'c10', c10(1:1)
+!  write(stdout, *) 'c11', c11(1:1)
+!  write(stdout, *) 'c0', c0(1:1)
+!  write(stdout, *) 'c1', c1(1:1)
+!  write(stdout, *) 'c', c(1:1)
+  ! DEBUG
+
+
+  return
+end subroutine trilinear
+
+
+
diff --git a/GWW/simple_ip/make.depend b/GWW/simple_ip/make.depend
new file mode 100644
index 000000000..f3ce188a7
--- /dev/null
+++ b/GWW/simple_ip/make.depend
@@ -0,0 +1,37 @@
+diagonalization.o : ../../Modules/constants.o
+diagonalization.o : ../../Modules/io_global.o
+diagonalization.o : ../../Modules/kind.o
+diagonalization.o : input_simple_ip.o
+diagonalization.o : simple_ip_objects.o
+dielectric.o : ../../Modules/constants.o
+dielectric.o : ../../Modules/io_global.o
+dielectric.o : ../../Modules/kind.o
+dielectric.o : ../../Modules/mp_world.o
+dielectric.o : ../../UtilXlib/mp.o
+dielectric.o : input_simple_ip.o
+dielectric.o : simple_ip_objects.o
+dielectric.o : tetra_mod1.o
+input_simple_ip.o : ../../Modules/io_files.o
+input_simple_ip.o : ../../Modules/io_global.o
+input_simple_ip.o : ../../Modules/kind.o
+input_simple_ip.o : ../../Modules/mp_world.o
+input_simple_ip.o : ../../UtilXlib/mp.o
+interpolation.o : ../../Modules/constants.o
+interpolation.o : ../../Modules/io_global.o
+interpolation.o : ../../Modules/kind.o
+interpolation.o : simple_ip_objects.o
+simple_ip.o : input_simple_ip.o
+simple_ip.o : simple_ip_objects.o
+simple_ip.o : start_end.o
+simple_ip_objects.o : ../../Modules/io_files.o
+simple_ip_objects.o : ../../Modules/io_global.o
+simple_ip_objects.o : ../../Modules/kind.o
+simple_ip_objects.o : ../../Modules/mp_world.o
+simple_ip_objects.o : ../../UtilXlib/mp.o
+simple_ip_objects.o : input_simple_ip.o
+start_end.o : ../../Modules/environment.o
+start_end.o : ../../Modules/io_global.o
+start_end.o : ../../Modules/mp_global.o
+start_end.o : ../../Modules/mp_world.o
+tetra_mod1.o : ../../Modules/kind.o
+tetra_mod2.o : ../../Modules/kind.o
diff --git a/GWW/simple_ip/simple_ip.f90 b/GWW/simple_ip/simple_ip.f90
new file mode 100644
index 000000000..02910527c
--- /dev/null
+++ b/GWW/simple_ip/simple_ip.f90
@@ -0,0 +1,56 @@
+program simple_ip
+  !
+  USE start_end
+  USE simple_ip_objects
+  USE input_simple_ip
+  !
+  implicit none
+  !
+  TYPE(input_options_simple_ip) :: din
+  TYPE(shirley) :: sh
+  TYPE(kpoints) :: k
+  TYPE(energies) :: e
+  !
+  call initialize_shirley(sh)
+  call initialize_kpoints(k)
+  call initialize_energies(e)
+  !
+  !setup MPI environment
+  call startup
+  !
+  call start_clock('simple_ip')
+  call start_clock('init (read)')
+  !
+  call read_input_simple_ip( din )  
+  call read_shirley(din,sh) 
+  call kgrid_creation(din,k,sh)
+  call create_energies(sh,k,e)
+  !
+  if (din%nonlocal_interpolation) then
+    call read_shirley_k_interp(din,sh,e,k)  ! trilinear interpolation
+  else
+    call read_shirley_k(din,sh,e)
+  endif
+  !
+  call stop_clock('init (read)')
+  !
+  ! calculate IP dielectric function (interband + intraband)
+  call dielectric(sh,din,k,e)
+  !
+  call stop_run
+  call deallocate_shirley(sh)
+  call deallocate_kpoints(k)
+  call deallocate_energies(e)
+  !
+  call stop_clock('simple_ip')
+  call print_clock('init (read)')
+  call print_clock('diagonalization')
+  call print_clock('diago_vnloc')
+  call print_clock('diago_zheevx')
+  call print_clock('optic_elements')
+  call print_clock('dielectric')
+  call print_clock('simple_ip')
+  stop
+  !
+end program simple_ip
+
diff --git a/GWW/simple_ip/simple_ip_objects.f90 b/GWW/simple_ip/simple_ip_objects.f90
new file mode 100644
index 000000000..051641d38
--- /dev/null
+++ b/GWW/simple_ip/simple_ip_objects.f90
@@ -0,0 +1,656 @@
+MODULE simple_ip_objects
+  ! this module describes the most important objects
+  !
+  USE kinds, ONLY : DP
+  !
+  TYPE energies
+     INTEGER :: nk!total number of k points
+     INTEGER :: nk_loc!local number of k points
+     INTEGER :: ik_first!first local k point
+     INTEGER :: ik_last!last local k point  
+     INTEGER :: num_bands   ! number of states
+     REAL(kind=DP), DIMENSION(:,:), POINTER :: energy ! energies (num_bands,nk_loc)
+     REAL(kind=DP), DIMENSION(:,:,:), POINTER :: energy_der ! derivatives of the energy  (3,num_bands,nk_loc)
+  END TYPE
+  !
+  TYPE kpoints
+     INTEGER :: nkgrid(3)         !  k-grid for interpolation (interp_grid)
+     REAL(kind=DP), DIMENSION(:,:), POINTER :: qk ! kpoints coordinates (3,nkgrid(1)*nkgrid(2)*nkgrid(3))
+     REAL(kind=DP) :: alat        ! lattice paramater 
+     REAL(kind=DP) :: bg(3,3)     ! reciprocal basis vectors
+     INTEGER :: nk_loc            ! local number of k points
+     INTEGER :: ik_first          ! first local k point
+     INTEGER :: ik_last           ! last local k point  
+     INTEGER :: nk_smooth_loc     ! number of local k-points
+     INTEGER :: nk                ! total number of k-points
+     REAL(kind=DP), DIMENSION(:,:), POINTER  ::  pos_cube   !   (3,nk_loc) 
+     INTEGER, DIMENSION(:,:), POINTER :: coord_cube   ! (8,nk_loc)
+  END TYPE
+  !
+  TYPE shirley
+      INTEGER :: ntot_e ! number of optimal basis states
+      LOGICAL :: noncolin ! if it is non-collinear
+      INTEGER :: nat ! number of atoms
+      INTEGER :: ntyp ! number of types of atoms
+      INTEGER :: nhm  ! max number of different beta functions per atom
+      INTEGER :: nspin  ! number of spins (1=no spin, 2=LSDA)
+      INTEGER :: nkb  ! total number of beta functions
+      INTEGER :: npol  !
+      INTEGER :: nks ! number of k-points of the smooth grid
+      INTEGER :: nk_smooth_loc ! number of local k-points
+      INTEGER :: num_val , num_cond  ! number of interpolated valence bands, number of interpolated conduction bands
+      INTEGER :: num_nbndv(2)       ! total number of occupied bands
+      INTEGER :: num_bands       ! total number of considered bands (num_bands = num_val + num_cond)
+      LOGICAL :: nonlocal_commutator  !
+      INTEGER, DIMENSION(3) :: nkpoints     ! smooth k-points grid on which H(k) is calculated
+      INTEGER, DIMENSION(:), POINTER :: ityp ! (nat)
+      INTEGER, DIMENSION(:), POINTER :: nh   ! (ntyp)
+      INTEGER, DIMENSION(:), POINTER :: indv_ijkb0 ! (nat)
+      REAL(kind=8) :: alat, nelec, omega        ! lattice paramater, number of electrons, volume prim. cell
+      REAL(kind=8) :: bg(3,3)     ! reciprocal basis vectors
+      REAL(kind=8) :: at(3,3)     ! direct basis vectors
+      REAL(kind=DP) :: s_bands     ! threshold for the construction of the optimal basis
+      COMPLEX(kind=DP), DIMENSION(:,:), POINTER ::  h0 , Vloc ! k-dependent Hamiltonian  (ntot_e,ntot_e)
+      COMPLEX(kind=DP), DIMENSION(:,:,:), POINTER :: h1 ! (ntot_e,ntot_e,3)
+      COMPLEX(kind=DP), DIMENSION(:,:,:,:), POINTER :: deeq_nc  ! (nhm,nhm,nat,nspin)
+      COMPLEX(kind=DP), DIMENSION(:,:,:), POINTER :: deeqc !(nhm,nhm,nat)
+      COMPLEX(kind=DP), DIMENSION(:,:,:,:), POINTER :: beck_nc ! (nkb,npol,ntot_e,nk)
+      COMPLEX(kind=DP), DIMENSION(:,:,:), POINTER :: beckc ! (nkb,ntot_e,nk)
+      COMPLEX(kind=DP),DIMENSION(:,:,:,:), POINTER :: commut ! (ntot_e,ntot_e,3,nk)
+  END TYPE
+  !
+  TYPE eigen
+    INTEGER :: num_bands       ! number of states
+    REAL(kind=DP) :: q(3)        ! k-point
+    REAL(kind=DP), DIMENSION(:), POINTER  :: energy     ! eigenenergies (num_bands)
+    COMPLEX(kind=DP), DIMENSION(:,:), POINTER  :: wave_func      ! eigenfunctions (ntot_e,num_bands)
+  END TYPE
+
+   CONTAINS
+
+   subroutine initialize_energies(element)
+      implicit none
+      TYPE(energies) :: element
+      nullify(element%energy)
+      nullify(element%energy_der)
+      return
+    end subroutine initialize_energies
+    
+    subroutine deallocate_energies(element)
+      implicit none
+      TYPE(energies) :: element
+      if(associated(element%energy)) deallocate(element%energy)
+      nullify(element%energy)
+      if(associated(element%energy_der)) deallocate(element%energy_der)
+      nullify(element%energy_der)
+      return
+    end subroutine deallocate_energies
+
+ 
+   subroutine initialize_kpoints(element)
+      implicit none
+      TYPE(kpoints) :: element
+      nullify(element%qk)
+      nullify(element%pos_cube)
+      nullify(element%coord_cube)
+      return
+    end subroutine initialize_kpoints
+    
+    SUBROUTINE deallocate_kpoints(element)
+      implicit none
+      TYPE(kpoints) :: element
+      if(associated(element%qk)) deallocate(element%qk)
+      nullify(element%qk)
+      if(associated(element%pos_cube)) deallocate(element%pos_cube)
+      nullify(element%pos_cube)
+      if(associated(element%coord_cube)) deallocate(element%coord_cube)
+      nullify(element%coord_cube)
+      return
+    END SUBROUTINE deallocate_kpoints
+
+    SUBROUTINE initialize_shirley(element)
+      implicit none
+      TYPE(shirley) :: element
+      nullify(element%ityp)
+      nullify(element%nh)
+      nullify(element%indv_ijkb0)
+      nullify(element%h0)
+      nullify(element%h1)
+      nullify(element%Vloc)
+      nullify(element%deeq_nc)
+      nullify(element%deeqc)
+      nullify(element%beck_nc)
+      nullify(element%beckc)
+      nullify(element%commut)
+      return
+    END SUBROUTINE initialize_shirley
+    
+    SUBROUTINE deallocate_shirley(element)
+      implicit none
+      TYPE(shirley) :: element
+      if(associated(element%ityp)) deallocate(element%ityp)
+      nullify(element%ityp)
+      if(associated(element%nh)) deallocate(element%nh)
+      nullify(element%nh)
+      if(associated(element%indv_ijkb0)) deallocate(element%indv_ijkb0)
+      nullify(element%indv_ijkb0)
+      if(associated(element%h0)) deallocate(element%h0)
+      nullify(element%h0)
+      if(associated(element%h1)) deallocate(element%h1)
+      nullify(element%h1)
+      if(associated(element%Vloc)) deallocate(element%Vloc)
+      nullify(element%Vloc)
+      if(associated(element%deeq_nc)) deallocate(element%deeq_nc)
+      nullify(element%deeq_nc)
+      if(associated(element%deeqc)) deallocate(element%deeqc)
+      nullify(element%deeqc)
+      if(associated(element%beck_nc)) deallocate(element%beck_nc)
+      nullify(element%beck_nc)
+      if(associated(element%beckc)) deallocate(element%beckc)
+      nullify(element%beckc)
+      if(associated(element%commut)) deallocate(element%commut)
+      nullify(element%commut)
+      return
+    END SUBROUTINE deallocate_shirley
+
+   SUBROUTINE initialize_eigen(element)
+      implicit none
+      TYPE(eigen) :: element
+      nullify(element%energy)
+      nullify(element%wave_func)
+      return
+    END SUBROUTINE initialize_eigen
+    
+    SUBROUTINE deallocate_eigen(element)
+      implicit none
+      TYPE(eigen) :: element
+      if(associated(element%energy)) deallocate(element%energy)
+      nullify(element%energy)
+      if(associated(element%wave_func)) deallocate(element%wave_func)
+      nullify(element%wave_func)
+      return
+    END SUBROUTINE deallocate_eigen
+
+
+
+    SUBROUTINE read_shirley(simpleip_in,sh)
+      USE input_simple_ip, ONLY : input_options_simple_ip
+      USE mp,                   ONLY : mp_bcast
+      USE mp_world,             ONLY : world_comm
+      USE io_files,  ONLY : tmp_dir
+      USE io_global, ONLY : ionode_id, ionode, stdout
+
+      implicit none
+
+      TYPE(input_options_simple_ip) :: simpleip_in
+      TYPE(shirley) :: sh
+      INTEGER, EXTERNAL :: find_free_unit
+      INTEGER :: iun, idir, nk
+      write(stdout,*)'simple_ip: opening file "hamiltonian"'
+      if(ionode) then
+         iun = find_free_unit()
+         open( unit=iun, file=trim(tmp_dir)//trim(simpleip_in%prefix)//'.hamiltonian', status='old',form='unformatted')
+         write(stdout,*)'File opened'
+         read(iun) sh%ntot_e
+      endif   
+      call mp_bcast(sh%ntot_e,ionode_id,world_comm)
+      allocate(sh%h0(sh%ntot_e,sh%ntot_e), sh%h1(sh%ntot_e,sh%ntot_e,3), sh%Vloc(sh%ntot_e,sh%ntot_e))
+
+      if (ionode) read(iun) sh%h0(1:sh%ntot_e,1:sh%ntot_e) 
+
+      do idir = 1,3
+        if (ionode) read(iun) sh%h1(1:sh%ntot_e,1:sh%ntot_e,idir) 
+      enddo
+
+      if (ionode) read(iun) sh%Vloc(1:sh%ntot_e,1:sh%ntot_e) 
+
+      call mp_bcast(sh%h0,ionode_id,world_comm)
+      call mp_bcast(sh%h1,ionode_id,world_comm)
+      call mp_bcast(sh%Vloc,ionode_id,world_comm)
+      
+      if(ionode) then
+        read(iun) sh%noncolin
+        read(iun) sh%nat 
+        read(iun) sh%ntyp 
+        read(iun) sh%nhm
+        read(iun) sh%nspin
+        read(iun) sh%nkb 
+        read(iun) sh%npol
+      endif
+      call mp_bcast(sh%noncolin,ionode_id,world_comm)
+      call mp_bcast(sh%nat,ionode_id,world_comm)
+      call mp_bcast(sh%ntyp,ionode_id,world_comm)
+      call mp_bcast(sh%nhm,ionode_id,world_comm)
+      call mp_bcast(sh%nspin,ionode_id,world_comm)
+      call mp_bcast(sh%nkb,ionode_id,world_comm)
+      call mp_bcast(sh%npol,ionode_id,world_comm)
+      
+
+      allocate(sh%ityp(sh%nat), sh%nh(sh%ntyp), sh%indv_ijkb0(sh%nat))
+      if(ionode) then
+        read(iun) sh%ityp(1:sh%nat)
+        read(iun) sh%nh(1:sh%ntyp)
+        read(iun) sh%indv_ijkb0(1:sh%nat)
+        read(iun) sh%nkpoints
+      endif
+      call mp_bcast(sh%ityp,ionode_id,world_comm)
+      call mp_bcast(sh%nh,ionode_id,world_comm)
+      call mp_bcast(sh%indv_ijkb0,ionode_id,world_comm)
+      call mp_bcast(sh%nkpoints,ionode_id,world_comm)
+      
+      nk = (sh%nkpoints(1))*(sh%nkpoints(2))*(sh%nkpoints(3))
+
+      allocate(sh%deeqc(sh%nhm,sh%nhm,sh%nat), sh%deeq_nc(sh%nhm,sh%nhm,sh%nat,sh%nspin))
+      if (sh%noncolin) then
+         if (ionode) read(iun) sh%deeq_nc(1:sh%nhm,1:sh%nhm,1:sh%nat,1:sh%nspin)
+      else
+         if (ionode) read(iun) sh%deeqc(1:sh%nhm,1:sh%nhm,1:sh%nat)
+      endif
+      
+      if (sh%noncolin) then
+         call mp_bcast(sh%deeq_nc,ionode_id,world_comm)
+      else
+         call mp_bcast(sh%deeqc,ionode_id,world_comm)
+      endif
+            
+      if(ionode) then
+         read(iun) sh%alat
+         read(iun) sh%bg(1:3,1:3)
+         read(iun) sh%at(1:3,1:3)
+         read(iun) sh%nelec
+         read(iun) sh%omega
+         read(iun) sh%num_val
+         read(iun) sh%num_cond
+         read(iun) sh%num_nbndv
+         read(iun) sh%nonlocal_commutator
+         read(iun) sh%s_bands
+      endif
+      sh%num_bands = sh%num_val + sh%num_cond
+      call mp_bcast(sh%alat,ionode_id,world_comm)
+      call mp_bcast(sh%bg,ionode_id,world_comm)
+      call mp_bcast(sh%at,ionode_id,world_comm)
+      call mp_bcast(sh%nelec,ionode_id,world_comm)
+      call mp_bcast(sh%omega,ionode_id,world_comm)
+      call mp_bcast(sh%num_val,ionode_id,world_comm)
+      call mp_bcast(sh%num_cond,ionode_id,world_comm)
+      call mp_bcast(sh%num_nbndv,ionode_id,world_comm)
+      call mp_bcast(sh%num_bands,ionode_id,world_comm) 
+      call mp_bcast(sh%nonlocal_commutator,ionode_id,world_comm)
+      call mp_bcast(sh%s_bands,ionode_id,world_comm)
+      !
+      if(ionode) then
+         close(iun)
+         write(stdout,*)'File closed'
+      endif
+      !
+      if (.not. simpleip_in%nonlocal_interpolation .and. ( sh%nkpoints(1) /= simpleip_in%interp_grid(1) .or. &
+      & sh%nkpoints(2) /= simpleip_in%interp_grid(2) .or. sh%nkpoints(3) /= simpleip_in%interp_grid(3) ) ) then
+        call errore('SIMPLE_IP', 'W/o trilinear interpolation, k-grids from simple and simple_ip have to be equal',1)
+      elseif ( simpleip_in%nonlocal_interpolation .and. ( 2*sh%nkpoints(1) /= simpleip_in%interp_grid(1) .or. &
+      & 2*sh%nkpoints(2) /= simpleip_in%interp_grid(2) .or. 2*sh%nkpoints(3) /= simpleip_in%interp_grid(3) ) ) then
+        call errore('SIMPLE_IP', 'W/ trilinear interpolation, simple_ip k-grid has to be the double of the simple k-grid',1)
+      endif
+      if( (sh%num_val .ne. sh%num_nbndv(1)) ) then
+        call errore('SIMPLE_IP', 'All the occupied bands must be included for Shirley interpolation (num_val=num_nbndv)',1)
+      endif
+      !
+    end subroutine read_shirley
+
+
+    SUBROUTINE kgrid_creation(simpleip_in,kgrid, sh)
+      USE input_simple_ip, ONLY : input_options_simple_ip
+      USE mp_world,  ONLY : mpime, nproc
+      !
+      implicit none
+      !
+      TYPE(input_options_simple_ip) :: simpleip_in
+      TYPE(kpoints) :: kgrid
+      TYPE(shirley) :: sh
+      INTEGER ::  i, j, k, ii
+      INTEGER :: l_blk
+      !
+      ! WARNING: this must be the same as in create_energies
+      kgrid%nkgrid(1:3) = simpleip_in%interp_grid(1:3)
+      kgrid%nk = (kgrid%nkgrid(1))*(kgrid%nkgrid(2))*(kgrid%nkgrid(3))
+      kgrid%alat = sh%alat
+      kgrid%bg(1:3,1:3) = sh%bg(1:3,1:3)
+      
+      l_blk=kgrid%nk/nproc
+      if(l_blk*nproc<kgrid%nk) l_blk=l_blk+1
+
+      if(l_blk*mpime+1 <= kgrid%nk) then
+         kgrid%ik_first=l_blk*mpime+1 
+         kgrid%ik_last=kgrid%ik_first+l_blk-1
+         if(kgrid%ik_last>kgrid%nk) kgrid%ik_last=kgrid%nk
+         kgrid%nk_loc=kgrid%ik_last-kgrid%ik_first+1
+      else
+         kgrid%nk_loc=0
+         kgrid%ik_first=0
+         kgrid%ik_last=-1
+      endif     
+      
+      allocate(kgrid%qk(3,kgrid%nk))
+      ! Uniform grid in [0,1)*bg --> it goes outside first BZ
+      ii = 0
+      do i=0,kgrid%nkgrid(1)-1
+       do j=0,kgrid%nkgrid(2)-1
+        do k=0,kgrid%nkgrid(3)-1
+           ii = ii + 1
+           kgrid%qk(1:3,ii) = kgrid%bg(1:3,1)*dble(i)/dble(kgrid%nkgrid(1)) + &
+        & kgrid%bg(1:3,2)*dble(j)/dble(kgrid%nkgrid(2)) + kgrid%bg(1:3,3)*dble(k)/dble(kgrid%nkgrid(3))
+        enddo
+       enddo
+      enddo
+      !
+    END SUBROUTINE kgrid_creation
+
+
+
+    SUBROUTINE create_energies(sh,kgrid,ene)
+      USE mp_world,  ONLY : mpime, nproc
+      implicit none
+      
+      TYPE(shirley) :: sh
+      TYPE(kpoints) :: kgrid
+      TYPE(energies) :: ene
+      INTEGER :: l_blk
+      
+      ene%nk = (kgrid%nkgrid(1))*(kgrid%nkgrid(2))*(kgrid%nkgrid(3))
+      ene%num_bands = sh%num_bands
+
+      l_blk=ene%nk/nproc
+      if(l_blk*nproc<ene%nk) l_blk=l_blk+1
+
+      if(l_blk*mpime+1 <= ene%nk) then
+         ene%ik_first=l_blk*mpime+1 
+         ene%ik_last=ene%ik_first+l_blk-1
+         if(ene%ik_last>ene%nk) ene%ik_last=ene%nk
+         ene%nk_loc=ene%ik_last-ene%ik_first+1
+      else
+         ene%nk_loc=0
+         ene%ik_first=0
+         ene%ik_last=-1
+      endif
+      
+      allocate(ene%energy(ene%num_bands,ene%nk_loc), ene%energy_der(3,ene%num_bands,ene%nk_loc))
+
+    END SUBROUTINE create_energies
+
+
+
+
+    SUBROUTINE read_shirley_k(simpleip_in,sh,ene)
+      USE input_simple_ip, ONLY : input_options_simple_ip
+      USE mp,                   ONLY : mp_bcast, mp_barrier
+      USE mp_world,             ONLY : world_comm
+      USE io_files,  ONLY : tmp_dir
+      USE io_global, ONLY : ionode_id, ionode, stdout
+      !
+      IMPLICIT NONE
+      !
+      TYPE(input_options_simple_ip) :: simpleip_in
+      TYPE(shirley) :: sh
+      TYPE(energies) :: ene
+      !
+      INTEGER, EXTERNAL :: find_free_unit
+      INTEGER :: iun, ll, ii , jj , kk
+      COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: sum_beckc_tmp 
+      COMPLEX(kind=DP), DIMENSION(:,:,:,:), ALLOCATABLE :: sum_beck_nc_tmp , sum_commut_tmp 
+      !
+      write(stdout,*)'simple_ip: opening file "hamiltonian_k"'
+      if(ionode) then
+         iun = find_free_unit()
+         open( unit=iun, file=trim(tmp_dir)//trim(simpleip_in%prefix)//'.hamiltonian_k', status='old',form='unformatted')
+         write(stdout,*)'File opened'
+      endif   
+      !
+      if (sh%noncolin) then
+        allocate(sum_beck_nc_tmp(sh%nkb,sh%npol,sh%ntot_e,sh%nkpoints(3)))
+        allocate(sh%beck_nc(sh%nkb,sh%npol,sh%ntot_e,ene%nk_loc))
+      else
+        allocate(sum_beckc_tmp(sh%nkb,sh%ntot_e,sh%nkpoints(3)))
+        allocate(sh%beckc(sh%nkb,sh%ntot_e,ene%nk_loc))
+      endif
+      !
+      if (sh%nonlocal_commutator) then
+        allocate(sum_commut_tmp(sh%ntot_e,sh%ntot_e,3,sh%nkpoints(3)))
+        allocate(sh%commut(sh%ntot_e,sh%ntot_e,3,ene%nk_loc))
+      endif
+      ! 
+      write(stdout,*) 'Total number of k-point blocks (for reading): ' , sh%nkpoints(1)*sh%nkpoints(2)
+      do ii=1,sh%nkpoints(1)
+        do jj=1,sh%nkpoints(2)
+        ! 
+        write(stdout,*) 'k-point block: ' , jj  + (ii - 1)*sh%nkpoints(2)
+        if (sh%noncolin) then
+          if (ionode) read(iun) sum_beck_nc_tmp(1:sh%nkb,1:sh%npol,1:sh%ntot_e,1:sh%nkpoints(3))
+        else
+          if (ionode) read(iun) sum_beckc_tmp(1:sh%nkb,1:sh%ntot_e,1:sh%nkpoints(3))
+        endif
+        !
+        if (sh%nonlocal_commutator) then
+          if (ionode) read(iun) sum_commut_tmp(1:sh%ntot_e,1:sh%ntot_e,1:3,1:sh%nkpoints(3))  ! read commutator matrix
+        endif
+        !
+        if (sh%noncolin) then
+          call mp_bcast(sum_beck_nc_tmp,ionode_id,world_comm)
+        else
+          call mp_bcast(sum_beckc_tmp,ionode_id,world_comm)
+        endif
+        !
+        if (sh%nonlocal_commutator) then
+          call mp_bcast(sum_commut_tmp,ionode_id,world_comm)
+        endif
+        !
+        do kk=1,sh%nkpoints(3)
+          !
+          ll = kk + (jj - 1)*sh%nkpoints(3) + (ii - 1)*sh%nkpoints(2)*sh%nkpoints(3)   ! global kindex
+          !
+          if (ene%ik_first <= ll .and. ll <= ene%ik_last) then
+            !
+            if (sh%noncolin) then
+              sh%beck_nc(1:sh%nkb, 1:sh%npol, 1:sh%ntot_e,ll - ene%ik_first + 1) = &
+              & sum_beck_nc_tmp(1:sh%nkb, 1:sh%npol, 1:sh%ntot_e, kk)
+            else
+              sh%beckc(1:sh%nkb, 1:sh%ntot_e, ll - ene%ik_first + 1) = &
+              & sum_beckc_tmp(1:sh%nkb,1:sh%ntot_e, kk)
+            endif
+            !
+            if (sh%nonlocal_commutator) then
+              sh%commut(1:sh%ntot_e, 1:sh%ntot_e, 1:3, ll - ene%ik_first + 1) = &
+              & sum_commut_tmp(1:sh%ntot_e, 1:sh%ntot_e, 1:3, kk)
+            endif
+          endif
+          ! 
+        enddo
+        !
+        enddo
+      enddo
+      !
+      if(ionode) then 
+        close(iun)
+        write(stdout,*)'File closed'
+      endif
+      !
+      if (sh%noncolin) then
+        deallocate(sum_beck_nc_tmp)
+      else
+        deallocate(sum_beckc_tmp)
+      endif
+      !
+      if (sh%nonlocal_commutator) then
+        deallocate(sum_commut_tmp)
+      endif
+      !
+    END SUBROUTINE read_shirley_k
+
+
+
+
+    !
+    SUBROUTINE read_shirley_k_interp(simpleip_in,sh,ene,k)
+      USE input_simple_ip, ONLY : input_options_simple_ip
+      USE mp,                   ONLY : mp_bcast, mp_barrier
+      USE mp_world,             ONLY : world_comm
+      USE io_files,  ONLY : tmp_dir
+      USE io_global, ONLY : ionode_id, ionode, stdout
+   
+      implicit none
+
+      TYPE(input_options_simple_ip) :: simpleip_in
+      TYPE(shirley) :: sh
+      TYPE(energies) :: ene
+      TYPE(kpoints)  :: k
+      REAL(kind=DP) :: qproj(3), q(3)
+      INTEGER :: i,j,ik, n(3), np(3), ii, jj , kk , ll , counter, nk_smooth, iun
+      INTEGER, EXTERNAL :: find_free_unit
+      INTEGER, DIMENSION(:,:), ALLOCATABLE :: coord_cube_global
+      INTEGER, DIMENSION(:), ALLOCATABLE  :: diff_kpoints    
+      COMPLEX(kind=DP), DIMENSION(:,:,:), ALLOCATABLE :: sum_beckc_tmp 
+      COMPLEX(kind=DP), DIMENSION(:,:,:,:), ALLOCATABLE :: sum_beck_nc_tmp , sum_commut_tmp   
+
+      nk_smooth = sh%nkpoints(1)*sh%nkpoints(2)*sh%nkpoints(3)  ! initial k-grid given to simple.x
+
+      allocate(k%pos_cube(3,k%nk_loc),k%coord_cube(8,k%nk_loc))
+      allocate(coord_cube_global(8,k%nk_loc),diff_kpoints(nk_smooth))
+
+      diff_kpoints = 0
+      coord_cube_global = 0
+      do ik = 1,k%nk_loc
+         q(1:3) = k%qk(1:3, ik + k%ik_first - 1)
+
+        ! Project q on the bg basis (it is obtained doing the scalar product of q with the direct lattice vectors at)
+        qproj(1:3) = 0
+        do i=1,3
+          do j=1,3
+            qproj(i) = qproj(i) + q(j)*sh%at(j,i) 
+          enddo
+        enddo
+  
+        n(1:3) = 0
+        do i=1,3
+          n(i) = int(qproj(i)*sh%nkpoints(i))
+          np(i) = n(i) + 1
+          !if ( np(i) >= sh%nkpoints(i) )  np(i) = 0
+          ! WARNING: When I reach the border of the grid I take the projector at the previous k-point.
+          if ( np(i) >= sh%nkpoints(i) ) np(i) = sh%nkpoints(i) - 1  
+        enddo   
+  
+        coord_cube_global(1,ik) = n(1)*sh%nkpoints(2)*sh%nkpoints(3) + n(2)*sh%nkpoints(3) + n(3) + 1
+        coord_cube_global(2,ik) = np(1)*sh%nkpoints(2)*sh%nkpoints(3) + n(2)*sh%nkpoints(3) + n(3) + 1
+        coord_cube_global(3,ik) = n(1)*sh%nkpoints(2)*sh%nkpoints(3) + np(2)*sh%nkpoints(3) + n(3) + 1
+        coord_cube_global(4,ik) = np(1)*sh%nkpoints(2)*sh%nkpoints(3) + np(2)*sh%nkpoints(3) + n(3) + 1
+        coord_cube_global(5,ik) = n(1)*sh%nkpoints(2)*sh%nkpoints(3) + n(2)*sh%nkpoints(3) + np(3) + 1
+        coord_cube_global(6,ik) = np(1)*sh%nkpoints(2)*sh%nkpoints(3) + n(2)*sh%nkpoints(3) + np(3) + 1
+        coord_cube_global(7,ik) = n(1)*sh%nkpoints(2)*sh%nkpoints(3) + np(2)*sh%nkpoints(3) + np(3) + 1
+        coord_cube_global(8,ik) = np(1)*sh%nkpoints(2)*sh%nkpoints(3) + np(2)*sh%nkpoints(3) + np(3) + 1
+
+        do i=1,3
+          k%pos_cube(i,ik) = ( qproj(i) - dble(n(i))/dble(sh%nkpoints(i)) ) * dble(sh%nkpoints(i)) !  qproj(i) --> q(i)
+        enddo
+
+        do ii =1,8
+          diff_kpoints(coord_cube_global(ii,ik)) = 1
+        enddo
+
+      enddo
+
+      k%nk_smooth_loc = sum(diff_kpoints)
+
+      if (sh%noncolin) then
+        allocate(sh%beck_nc(sh%nkb,sh%npol,sh%ntot_e,k%nk_smooth_loc)) ! (nkb,npol,ntot_e,nk)
+        allocate(sum_beck_nc_tmp(sh%nkb,sh%npol,sh%ntot_e,sh%nkpoints(3)))
+      else
+        allocate(sh%beckc(sh%nkb,sh%ntot_e,k%nk_smooth_loc))
+        allocate(sum_beckc_tmp(sh%nkb,sh%ntot_e,sh%nkpoints(3)))
+      endif
+      if (sh%nonlocal_commutator) then
+        allocate(sh%commut(sh%ntot_e,sh%ntot_e,3,k%nk_smooth_loc))
+        allocate(sum_commut_tmp(sh%ntot_e,sh%ntot_e,3,sh%nkpoints(3)))
+      endif
+
+      counter = 0
+      do ii = 1,nk_smooth
+        if (diff_kpoints(ii) /= 0) then
+          counter = counter + 1
+          diff_kpoints(ii) = counter
+        endif  
+      enddo
+
+      do ik = 1, k%nk_loc
+        do ii =1,8
+          k%coord_cube(ii,ik) = diff_kpoints(coord_cube_global(ii,ik))
+        enddo
+      enddo
+
+      write(stdout,*)'simple_ip: opening file "hamiltonian_k"'
+      if(ionode) then
+         iun = find_free_unit()
+         open( unit=iun, file=trim(tmp_dir)//trim(simpleip_in%prefix)//'.hamiltonian_k', status='old',form='unformatted')
+         write(stdout,*)'File opened'
+      endif   
+      !
+      write(stdout,*) 'Total number of k-point blocks: ' , sh%nkpoints(1)*sh%nkpoints(2)
+      do ii=1,sh%nkpoints(1)
+        do jj=1,sh%nkpoints(2)
+          ! 
+          write(stdout,*) 'k-point block: ' , jj  + (ii - 1)*sh%nkpoints(2)
+          if (sh%noncolin) then
+            if (ionode) read(iun) sum_beck_nc_tmp(1:sh%nkb,1:sh%npol,1:sh%ntot_e,1:sh%nkpoints(3))
+          else
+            if (ionode) read(iun) sum_beckc_tmp(1:sh%nkb,1:sh%ntot_e,1:sh%nkpoints(3))
+          endif
+          !
+          if (sh%nonlocal_commutator) then
+            if (ionode) read(iun) sum_commut_tmp(1:sh%ntot_e,1:sh%ntot_e,1:3,1:sh%nkpoints(3))  ! read commutator matrix
+          endif
+          !
+          if (sh%noncolin) then
+            call mp_bcast(sum_beck_nc_tmp,ionode_id,world_comm)
+          else
+            call mp_bcast(sum_beckc_tmp,ionode_id,world_comm)
+          endif
+          !
+          if (sh%nonlocal_commutator) then
+            call mp_bcast(sum_commut_tmp,ionode_id,world_comm)
+          endif
+          !
+          do kk=1,sh%nkpoints(3)
+            !
+            ll = kk + (jj - 1)*sh%nkpoints(3) + (ii - 1)*sh%nkpoints(2)*sh%nkpoints(3)   ! global kindex
+            !
+            if (diff_kpoints(ll) /= 0) then
+              if (sh%noncolin) then
+                sh%beck_nc(1:sh%nkb,1:sh%npol,1:sh%ntot_e,diff_kpoints(ll)) = sum_beck_nc_tmp(1:sh%nkb,1:sh%npol,1:sh%ntot_e,kk)
+              else
+                sh%beckc(1:sh%nkb,1:sh%ntot_e,diff_kpoints(ll)) = sum_beckc_tmp(1:sh%nkb,1:sh%ntot_e,kk)
+              endif
+              if (sh%nonlocal_commutator) then
+                  sh%commut(1:sh%ntot_e,1:sh%ntot_e,1:3,diff_kpoints(ll)) = sum_commut_tmp(1:sh%ntot_e,1:sh%ntot_e,1:3,kk) 
+              endif
+            endif      
+            ! 
+          enddo
+          !
+        enddo
+      enddo
+      
+      if(ionode) then 
+        close(iun)
+        write(stdout,*)'File closed'
+      endif
+
+      if (sh%noncolin) then
+        deallocate(sum_beck_nc_tmp) 
+      else
+        deallocate(sum_beckc_tmp)
+      endif
+      if (sh%nonlocal_commutator) then
+        deallocate(sum_commut_tmp)
+      endif
+      deallocate(coord_cube_global,diff_kpoints)
+
+    END SUBROUTINE read_shirley_k_interp
+
+
+END MODULE simple_ip_objects
diff --git a/GWW/simple_ip/start_end.f90 b/GWW/simple_ip/start_end.f90
new file mode 100644
index 000000000..c5208ec7f
--- /dev/null
+++ b/GWW/simple_ip/start_end.f90
@@ -0,0 +1,68 @@
+!
+! Copyright (C) 2001-2013 Quantum ESPRESSO group
+! This file is distributed under the terms of the
+! GNU General Public License. See the file `License'
+! in the root directory of the present distribution,
+! or http://www.gnu.org/copyleft/gpl.txt .
+!
+!
+
+
+MODULE start_end
+    !this module contains routines to initialize the MPI environment
+    IMPLICIT NONE
+    CHARACTER (len=10), PARAMETER :: code = 'SIMPLE_IP'
+#ifdef __OPENMP
+    INTEGER, SAVE :: ntids
+#endif
+
+CONTAINS
+
+  SUBROUTINE startup
+
+  !
+  USE io_global,  ONLY : stdout, ionode
+  USE mp_world,   ONLY : nproc
+  USE environment,           ONLY: environment_start
+  USE mp_global,  ONLY : mp_startup
+  
+  IMPLICIT NONE
+
+#ifdef __MPI
+  CALL mp_startup()
+#endif
+  
+  CALL environment_start ( code )
+  
+#ifdef __MPI
+  if(ionode) then
+     write(stdout,*) ''
+     write(stdout,*) 'MPI PARALLEL VERSION'
+     write(stdout,*) 'Number of procs: ', nproc
+     write(stdout,*)  'simple_ip: Version 1.00'
+  endif
+#else 
+   write(stdout,*)  'simple_ip: Version 1.00'
+#endif
+  return
+
+  END SUBROUTINE startup
+
+  SUBROUTINE stop_run
+!this subroutine kills the MPI environment
+
+    USE io_global,         ONLY : stdout, ionode
+    USE mp_global,         ONLY : mp_global_end
+
+    IMPLICIT NONE
+
+#ifdef __MPI
+    if(ionode) write(stdout,*) 'Stopping MPI environment'
+    call mp_global_end( )
+#endif
+
+    return
+  END SUBROUTINE stop_run
+
+
+END MODULE start_end
diff --git a/GWW/simple_ip/tetra_mod1.f90 b/GWW/simple_ip/tetra_mod1.f90
new file mode 100644
index 000000000..da73a3c49
--- /dev/null
+++ b/GWW/simple_ip/tetra_mod1.f90
@@ -0,0 +1,302 @@
+MODULE tetra_ip
+!this module provides rouitnes for the tetrahedra method
+
+  USE kinds, ONLY : dp
+
+contains
+  !---------------------------------------------------------------------------
+  subroutine tetrahedra1 (nk1, nk2, nk3, ntetra, tetra)
+    !-------------------------------------------------------------------------
+    !
+    ! Tetrahedron method according to P. E. Bloechl et al, PRB49, 16223 (1994)
+    !
+    !-------------------------------------------------------------------------
+    implicit none
+    
+    integer, intent(in):: nk1, nk2, nk3, ntetra
+    integer, intent(out) :: tetra(4,ntetra) 
+    integer :: nkr, i,j,k, n, ip1,jp1,kp1, &
+         n1,n2,n3,n4,n5,n6,n7,n8
+    integer, parameter :: stderr = 0
+    
+    ! Total number of k-points
+    nkr = nk1*nk2*nk3
+
+    !  Construct tetrahedra
+
+    do i=1,nk1
+       do j=1,nk2
+          do k=1,nk3
+             !  n1-n8 are the indices of k-point 1-8 forming a cube
+             ip1 = mod(i,nk1)+1
+             jp1 = mod(j,nk2)+1
+             kp1 = mod(k,nk3)+1
+             n1 = (  k-1) + (  j-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n2 = (  k-1) + (  j-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             n3 = (  k-1) + (jp1-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n4 = (  k-1) + (jp1-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             n5 = (kp1-1) + (  j-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n6 = (kp1-1) + (  j-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             n7 = (kp1-1) + (jp1-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n8 = (kp1-1) + (jp1-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             !  there are 6 tetrahedra per cube (and nk1*nk2*nk3 cubes)
+             n  = 6 * ( (k-1) + (j-1)*nk3 + (i-1)*nk3*nk2 )
+             
+             tetra (1,n+1) = n1
+             tetra (2,n+1) = n2
+             tetra (3,n+1) = n3
+             tetra (4,n+1) = n6
+             
+             tetra (1,n+2) = n2
+             tetra (2,n+2) = n3
+             tetra (3,n+2) = n4
+             tetra (4,n+2) = n6
+             
+             tetra (1,n+3) = n1
+             tetra (2,n+3) = n3
+             tetra (3,n+3) = n5
+             tetra (4,n+3) = n6
+             
+             tetra (1,n+4) = n3
+             tetra (2,n+4) = n4
+             tetra (3,n+4) = n6
+             tetra (4,n+4) = n8
+             
+             tetra (1,n+5) = n3
+             tetra (2,n+5) = n6
+             tetra (3,n+5) = n7
+             tetra (4,n+5) = n8
+             
+             tetra (1,n+6) = n3
+             tetra (2,n+6) = n5
+             tetra (3,n+6) = n6
+             tetra (4,n+6) = n7
+          enddo
+       enddo
+    enddo
+    
+    !  check
+    do n=1, ntetra
+       do i=1,4        
+          if ( tetra(i,n)<1 .or. tetra(i,n)> (nk1*nk2*nk3) ) then
+             write(stderr,*) 'Something wrong with the construction of tetrahedra' 
+             call exit(1)
+          endif
+       enddo
+    enddo
+  end subroutine tetrahedra1
+  
+  !-----------------------------------------------------------------------
+  subroutine hpsort1 (n, ra, ind)  
+    !---------------------------------------------------------------------
+    ! sort an array ra(1:n) into ascending order using heapsort algorithm.
+    ! n is input, ra is replaced on output by its sorted rearrangement.
+    ! create an index table (ind) by making an exchange in the index array
+    ! whenever an exchange is made on the sorted data array (ra).
+    ! in case of equal values in the data array (ra) the values in the
+    ! index array (ind) are used to order the entries.
+    ! if on input ind(1)  = 0 then indices are initialized in the routine,
+    ! if on input ind(1) != 0 then indices are assumed to have been
+    !                initialized before entering the routine and these
+    !                indices are carried around during the sorting process
+    !
+    ! no work space needed !
+    ! free us from machine-dependent sorting-routines !
+    !
+    ! adapted from Numerical Recipes pg. 329 (new edition)
+    !
+    !---------------------------------------------------------------------
+    implicit none  
+    !-input/output variables
+    integer :: n  
+    integer :: ind (*)  
+    real(kind=dp) :: ra (*)  
+    !-local variables
+    integer :: i, ir, j, l, iind  
+    real(kind=dp) :: rra  
+    ! initialize index array
+    if (ind (1) .eq.0) then  
+       do i = 1, n  
+          ind (i) = i  
+       enddo
+    endif
+    ! nothing to order
+    if (n.lt.2) return  
+    ! initialize indices for hiring and retirement-promotion phase
+    l = n / 2 + 1  
+    ir = n  
+10  continue  
+    ! still in hiring phase
+    if (l.gt.1) then  
+       l = l - 1  
+       rra = ra (l)  
+       iind = ind (l)  
+       ! in retirement-promotion phase.
+    else  
+       ! clear a space at the end of the array
+       rra = ra (ir)  
+       !
+       iind = ind (ir)  
+       ! retire the top of the heap into it
+       ra (ir) = ra (1)  
+       !
+       ind (ir) = ind (1)  
+       ! decrease the size of the corporation
+       ir = ir - 1  
+       ! done with the last promotion
+       if (ir.eq.1) then  
+          ! the least competent worker at all !
+          ra (1) = rra  
+          !
+          ind (1) = iind  
+          return  
+       endif
+    endif
+    ! wheter in hiring or promotion phase, we
+    i = l  
+    ! set up to place rra in its proper level
+    j = l + l  
+    !
+    do while (j.le.ir)  
+       if (j.lt.ir) then  
+          ! compare to better underling
+          if (ra (j) .lt.ra (j + 1) ) then  
+             j = j + 1  
+          elseif (ra (j) .eq.ra (j + 1) ) then  
+             if (ind (j) .lt.ind (j + 1) ) j = j + 1  
+          endif
+       endif
+       ! demote rra
+       if (rra.lt.ra (j) ) then  
+          ra (i) = ra (j)  
+          ind (i) = ind (j)  
+          i = j  
+          j = j + j  
+       elseif (rra.eq.ra (j) ) then  
+          ! demote rra
+          if (iind.lt.ind (j) ) then  
+             ra (i) = ra (j)  
+             ind (i) = ind (j)  
+             i = j  
+             j = j + j  
+          else  
+             ! set j to terminate do-while loop
+             j = ir + 1  
+          endif
+          ! this is the right place for rra
+       else  
+          ! set j to terminate do-while loop
+          j = ir + 1  
+       endif
+    enddo
+    ra (i) = rra  
+    ind (i) = iind  
+    goto 10  
+    !
+  end subroutine hpsort1
+  
+  !--------------------------------------------------------------------
+  subroutine weights_delta1 (energy, band,  ntetra, tetra, nkpoints,  weights)
+    !--------------------------------------------------------------------
+    !
+    ! ... calculates weights with the tetrahedron method (P.E.Bloechl)
+    !     assuming the integrand is convoluted with a delta function
+    !     centered about the band 
+    !
+    !--------------------------------------------------------------------
+    
+    implicit none
+    ! I/O variables
+    integer, intent(in) :: nkpoints, ntetra, tetra(4, ntetra)
+    real(kind=dp), intent(in) :: energy, band(nkpoints)
+    real(kind=dp), intent(out) :: weights(nkpoints)
+    ! local variables
+    real(kind=dp) :: e1, e2, e3, e4, fact, etetra (4)
+    integer :: ik, ibnd, nt, nk, ns, i, kp1, kp2, kp3, kp4, itetra (4)
+    
+    ! Initialize to zero the weights
+    weights = 0.0d0
+    
+    do nt = 1, ntetra      
+       !
+       ! etetra are the energies at the vertexes of the nt-th tetrahedron
+       !
+       do i = 1, 4
+          etetra (i) = band (tetra (i, nt))
+       enddo
+       itetra (1) = 0
+       call hpsort1 (4, etetra, itetra)
+       !
+       ! ...sort in ascending order: e1 < e2 < e3 < e4
+       !
+       e1 = etetra (1)
+       e2 = etetra (2)
+       e3 = etetra (3)
+       e4 = etetra (4)
+       !
+       ! kp1-kp4 are the irreducible k-points corresponding to e1-e4
+       !
+       kp1 = tetra (itetra (1), nt) 
+       kp2 = tetra (itetra (2), nt) 
+       kp3 = tetra (itetra (3), nt) 
+       kp4 = tetra (itetra (4), nt) 
+       !
+       ! calculate weights 
+       !
+       ! Remember that if energy.lt.e1 or energy.ge.e4 
+       ! there is no contribution in this case
+       if (energy.lt.e4.and.energy.ge.e3) then                                
+          fact = 1.0d0/ntetra *  (e4 - energy)**2 / (e4 - e1) / (e4 - e2) &
+               / (e4 - e3)
+          weights (kp1) = weights (kp1) + fact * (e4 - energy) / (e4 - e1)
+          weights (kp2) = weights (kp2) + fact * (e4 - energy) / (e4 - e2)
+          weights (kp3) = weights (kp3) + fact * (e4 - energy) / (e4 - e3)
+          weights (kp4) = weights (kp4) + fact * ( 3.0d0 - (e4 - energy) * &
+               (1.d0 / (e4 - e1) + 1.d0 / (e4 - e2) + 1.d0 / (e4 - e3) ) )
+       elseif (energy.lt.e3.and.energy.ge.e2) then
+          weights (kp1) = weights (kp1) + 1.0d0/ntetra * ( &
+               (energy - e2)**3 * (1.0d0/ (e3 - e1)**2 / (e3 - e2) / (e4 - e2) + &
+               1.0d0/ (e3 - e1) / (e4 - e1)**2 / (e4 - e2) + &
+               1.0d0/ (e3 - e1)**2 / (e4 - e1) / (e4 - e2))  &
+               -3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1)**2 / (e4 - e1) + &
+               1.0d0/ (e3 - e1) / (e4 - e1)**2)  &
+               -3.0d0 * (energy - e2) * ((e2 - e1)**2 - (e3 - e2)*(e4 - e2))  &
+               / (e3 - e1)**2 / (e4 - e1)**2         &                                            
+               +(e2 - e1) * ( (e3 - e2)/(e3 - e1) + (e4 - e2)/(e4 - e1)) & 
+               / (e3 - e1) / (e4 - e1))
+          weights (kp2) = weights (kp2) + 1.0d0/ntetra * ( &
+               (energy - e2)**3 * (1.0d0/ (e3 - e1) / (e4 - e1) / (e4 - e2)**2 + &
+               1.0d0/ (e3 - e1) / (e3 - e2) / (e4 - e2)**2 + &
+               1.0d0/ (e3 - e1) / (e3 - e2)**2 / (e4 - e2))  &
+               -3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1) / (e4 - e1) / (e4 - e2) + &
+               1.0d0/ (e3 - e1) / (e3 - e2) / (e4 - e2))  &
+               +3.0d0 * (energy - e2) * (1.0d0/ (e3 - e1) / (e4 - e1))  &
+               +(e2 - e1) / (e3 - e1) / (e4 - e1))
+          weights (kp3) = weights (kp3) + 1.0d0/ntetra * ( &
+               -(energy - e2)**3 * (1.0d0/ (e3 - e1)**2 / (e4 - e1) / (e4 - e2) + &
+               1.0d0/ (e3 - e1) / (e3 - e2)**2 / (e4 - e2) + &
+               1.0d0/ (e3 - e1)**2 / (e3 - e2) / (e4 - e2))  &
+               +3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1)**2 / (e4 - e1)) &
+               +3.0d0 * (energy - e2) * ((e2 - e1)/ (e3 - e1)**2 / (e4 - e1))  &
+               +(e2 - e1)**2 / (e3 - e1)**2 / (e4 - e1))
+          weights (kp4) = weights (kp4) + 1.0d0/ntetra * ( &
+               -(energy - e2)**3 * (1.0d0/ (e3 - e1) / (e3 - e2) / (e4 - e1)**2 + &
+               1.0d0/ (e3 - e2) / (e4 - e1) / (e4 - e2)**2 + &
+               1.0d0/ (e3 - e2) / (e4 - e1)**2 / (e4 - e2))  &
+               +3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1) / (e4 - e1)**2) &
+               +3.0d0 * (energy - e2) * ((e2 - e1)/ (e3 - e1) / (e4 - e1)**2)  &
+               +(e2 - e1)**2 / (e3 - e1) / (e4 - e1)**2)
+       elseif (energy.lt.e2.and.energy.ge.e1) then
+          fact = 1.0d0 / ntetra * (energy - e1) **2 / (e2 - e1) / (e3 - e1) &
+               / (e4 - e1)
+          weights (kp1) = weights (kp1) + fact * (3.d0 - (energy - e1) * &
+               (1.d0 / (e2 - e1) + 1.d0 / (e3 - e1) + 1.d0 / (e4 - e1) ) )
+          weights (kp2) = weights (kp2) + fact * (energy - e1) / (e2 - e1) 
+          weights (kp3) = weights (kp3) + fact * (energy - e1) / (e3 - e1) 
+          weights (kp4) = weights (kp4) + fact * (energy - e1) / (e4 - e1)
+       endif
+    enddo
+  end subroutine weights_delta1
+
+END MODULE tetra_ip
+
diff --git a/GWW/simple_ip/tetra_mod2.f90 b/GWW/simple_ip/tetra_mod2.f90
new file mode 100644
index 000000000..1f326e7fb
--- /dev/null
+++ b/GWW/simple_ip/tetra_mod2.f90
@@ -0,0 +1,302 @@
+MODULE tetra_ip
+!this module provides rouitnes for the tetrahedra method
+
+  USE kinds, ONLY : dp
+
+contains
+  !---------------------------------------------------------------------------
+  subroutine tetrahedra1 (nk1, nk2, nk3, ntetra, tetra)
+    !-------------------------------------------------------------------------
+    !
+    ! Tetrahedron method according to P. E. Bloechl et al, PRB49, 16223 (1994)
+    !
+    !-------------------------------------------------------------------------
+    implicit none
+    
+    integer, intent(in):: nk1, nk2, nk3, ntetra
+    integer, intent(out) :: tetra(4,ntetra) 
+    integer :: nkr, i,j,k, n, ip1,jp1,kp1, &
+         n1,n2,n3,n4,n5,n6,n7,n8
+    integer, parameter :: stderr = 0
+    
+    ! Total number of k-points
+    nkr = nk1*nk2*nk3
+
+    !  Construct tetrahedra
+
+    do i=1,nk1
+       do j=1,nk2
+          do k=1,nk3
+             !  n1-n8 are the indices of k-point 1-8 forming a cube
+             ip1 = mod(i,nk1)+1
+             jp1 = mod(j,nk2)+1
+             kp1 = mod(k,nk3)+1
+             n1 = (  k-1) + (  j-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n2 = (  k-1) + (  j-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             n3 = (  k-1) + (jp1-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n4 = (  k-1) + (jp1-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             n5 = (kp1-1) + (  j-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n6 = (kp1-1) + (  j-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             n7 = (kp1-1) + (jp1-1)*nk3 + (  i-1)*nk2*nk3 + 1
+             n8 = (kp1-1) + (jp1-1)*nk3 + (ip1-1)*nk2*nk3 + 1
+             !  there are 6 tetrahedra per cube (and nk1*nk2*nk3 cubes)
+             n  = 6 * ( (k-1) + (j-1)*nk3 + (i-1)*nk3*nk2 )
+             
+             tetra (1,n+1) = n1
+             tetra (2,n+1) = n2
+             tetra (3,n+1) = n3
+             tetra (4,n+1) = n6
+             
+             tetra (1,n+2) = n2
+             tetra (2,n+2) = n3
+             tetra (3,n+2) = n4
+             tetra (4,n+2) = n6
+             
+             tetra (1,n+3) = n1
+             tetra (2,n+3) = n3
+             tetra (3,n+3) = n5
+             tetra (4,n+3) = n6
+             
+             tetra (1,n+4) = n3
+             tetra (2,n+4) = n4
+             tetra (3,n+4) = n6
+             tetra (4,n+4) = n8
+             
+             tetra (1,n+5) = n3
+             tetra (2,n+5) = n6
+             tetra (3,n+5) = n7
+             tetra (4,n+5) = n8
+             
+             tetra (1,n+6) = n3
+             tetra (2,n+6) = n5
+             tetra (3,n+6) = n6
+             tetra (4,n+6) = n7
+          enddo
+       enddo
+    enddo
+    
+    !  check
+    do n=1, ntetra
+       do i=1,4        
+          if ( tetra(i,n)<1 .or. tetra(i,n)> (nk1*nk2*nk3) ) then
+             write(stderr,*) 'Something wrong with the construction of tetrahedra' 
+             call exit(1)
+          endif
+       enddo
+    enddo
+  end subroutine tetrahedra1
+  
+  !-----------------------------------------------------------------------
+  subroutine hpsort1 (n, ra, ind)  
+    !---------------------------------------------------------------------
+    ! sort an array ra(1:n) into ascending order using heapsort algorithm.
+    ! n is input, ra is replaced on output by its sorted rearrangement.
+    ! create an index table (ind) by making an exchange in the index array
+    ! whenever an exchange is made on the sorted data array (ra).
+    ! in case of equal values in the data array (ra) the values in the
+    ! index array (ind) are used to order the entries.
+    ! if on input ind(1)  = 0 then indices are initialized in the routine,
+    ! if on input ind(1) != 0 then indices are assumed to have been
+    !                initialized before entering the routine and these
+    !                indices are carried around during the sorting process
+    !
+    ! no work space needed !
+    ! free us from machine-dependent sorting-routines !
+    !
+    ! adapted from Numerical Recipes pg. 329 (new edition)
+    !
+    !---------------------------------------------------------------------
+    implicit none  
+    !-input/output variables
+    integer :: n  
+    integer :: ind (*)  
+    real(kind=dp) :: ra (*)  
+    !-local variables
+    integer :: i, ir, j, l, iind  
+    real(kind=dp) :: rra  
+    ! initialize index array
+    if (ind (1) .eq.0) then  
+       do i = 1, n  
+          ind (i) = i  
+       enddo
+    endif
+    ! nothing to order
+    if (n.lt.2) return  
+    ! initialize indices for hiring and retirement-promotion phase
+    l = n / 2 + 1  
+    ir = n  
+10  continue  
+    ! still in hiring phase
+    if (l.gt.1) then  
+       l = l - 1  
+       rra = ra (l)  
+       iind = ind (l)  
+       ! in retirement-promotion phase.
+    else  
+       ! clear a space at the end of the array
+       rra = ra (ir)  
+       !
+       iind = ind (ir)  
+       ! retire the top of the heap into it
+       ra (ir) = ra (1)  
+       !
+       ind (ir) = ind (1)  
+       ! decrease the size of the corporation
+       ir = ir - 1  
+       ! done with the last promotion
+       if (ir.eq.1) then  
+          ! the least competent worker at all !
+          ra (1) = rra  
+          !
+          ind (1) = iind  
+          return  
+       endif
+    endif
+    ! wheter in hiring or promotion phase, we
+    i = l  
+    ! set up to place rra in its proper level
+    j = l + l  
+    !
+    do while (j.le.ir)  
+       if (j.lt.ir) then  
+          ! compare to better underling
+          if (ra (j) .lt.ra (j + 1) ) then  
+             j = j + 1  
+          elseif (ra (j) .eq.ra (j + 1) ) then  
+             if (ind (j) .lt.ind (j + 1) ) j = j + 1  
+          endif
+       endif
+       ! demote rra
+       if (rra.lt.ra (j) ) then  
+          ra (i) = ra (j)  
+          ind (i) = ind (j)  
+          i = j  
+          j = j + j  
+       elseif (rra.eq.ra (j) ) then  
+          ! demote rra
+          if (iind.lt.ind (j) ) then  
+             ra (i) = ra (j)  
+             ind (i) = ind (j)  
+             i = j  
+             j = j + j  
+          else  
+             ! set j to terminate do-while loop
+             j = ir + 1  
+          endif
+          ! this is the right place for rra
+       else  
+          ! set j to terminate do-while loop
+          j = ir + 1  
+       endif
+    enddo
+    ra (i) = rra  
+    ind (i) = iind  
+    goto 10  
+    !
+  end subroutine hpsort1
+  
+  !--------------------------------------------------------------------
+  subroutine weights_delta1 (energy, band,  ntetra, tetra, nkpoints,  weights)
+    !--------------------------------------------------------------------
+    !
+    ! ... calculates weights with the tetrahedron method (P.E.Bloechl)
+    !     assuming the integrand is convoluted with a delta function
+    !     centered about the band 
+    !
+    !--------------------------------------------------------------------
+    
+    implicit none
+    ! I/O variables
+    integer, intent(in) :: nkpoints, ntetra, tetra(4, ntetra)
+    real(kind=dp), intent(in) :: energy, band(nkpoints)
+    real(kind=dp), intent(out) :: weights(nkpoints)
+    ! local variables
+    real(kind=dp) :: e1, e2, e3, e4, fact, etetra (4)
+    integer :: ik, ibnd, nt, nk, ns, i, kp1, kp2, kp3, kp4, itetra (4)
+    
+    ! Initialize to zero the weights
+    weights = 0.0d0
+    
+    do nt = 1, ntetra      
+       !
+       ! etetra are the energies at the vertexes of the nt-th tetrahedron
+       !
+       do i = 1, 4
+          etetra (i) = band (tetra (i, nt))
+       enddo
+       itetra (1) = 0
+       call hpsort1 (4, etetra, itetra)
+       !
+       ! ...sort in ascending order: e1 < e2 < e3 < e4
+       !
+       e1 = etetra (1)
+       e2 = etetra (2)
+       e3 = etetra (3)
+       e4 = etetra (4)
+       !
+       ! kp1-kp4 are the irreducible k-points corresponding to e1-e4
+       !
+       kp1 = tetra (itetra (1), nt) 
+       kp2 = tetra (itetra (2), nt) 
+       kp3 = tetra (itetra (3), nt) 
+       kp4 = tetra (itetra (4), nt) 
+       !
+       ! calculate weights 
+       !
+       ! Remember that if energy.lt.e1 or energy.ge.e4 
+       ! there is no contribution in this case
+       if (energy.lt.e4.and.energy.ge.e3) then                                
+          fact = 1.0d0/ntetra *  (e4 - energy)**2 / (e4 - e1) / (e4 - e2) &
+               / (e4 - e3)
+          weights (kp1) = weights (kp1) + fact * (e4 - energy) / (e4 - e1)
+          weights (kp2) = weights (kp2) + fact * (e4 - energy) / (e4 - e2)
+          weights (kp3) = weights (kp3) + fact * (e4 - energy) / (e4 - e3)
+          weights (kp4) = weights (kp4) + fact * ( 3.0d0 - (e4 - energy) * &
+               (1.d0 / (e4 - e1) + 1.d0 / (e4 - e2) + 1.d0 / (e4 - e3) ) )
+       elseif (energy.lt.e3.and.energy.ge.e2) then
+          weights (kp1) = weights (kp1) + 1.0d0/ntetra * ( &
+               (energy - e2)**3 * (1.0d0/ (e3 - e1)**2 / (e3 - e2) / (e4 - e2) + &
+               1.0d0/ (e3 - e1) / (e4 - e1)**2 / (e4 - e2) + &
+               1.0d0/ (e3 - e1)**2 / (e4 - e1) / (e4 - e2))  &
+               -3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1)**2 / (e4 - e1) + &
+               1.0d0/ (e3 - e1) / (e4 - e1)**2)  &
+               -3.0d0 * (energy - e2) * ((e2 - e1)**2 - (e3 - e2)*(e4 - e2))  &
+               / (e3 - e1)**2 / (e4 - e1)**2         &                                            
+               +(e2 - e1) * ( (e3 - e2)/(e3 - e1) + (e4 - e2)/(e4 - e1)) & 
+               / (e3 - e1) / (e4 - e1))
+          weights (kp2) = weights (kp2) + 1.0d0/ntetra * ( &
+               (energy - e2)**3 * (1.0d0/ (e3 - e1) / (e4 - e1) / (e4 - e2)**2 + &
+               1.0d0/ (e3 - e1) / (e3 - e2) / (e4 - e2)**2 + &
+               1.0d0/ (e3 - e1) / (e3 - e2)**2 / (e4 - e2))  &
+               -3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1) / (e4 - e1) / (e4 - e2) + &
+               1.0d0/ (e3 - e1) / (e3 - e2) / (e4 - e2))  &
+               +3.0d0 * (energy - e2) * (1.0d0/ (e3 - e1) / (e4 - e1))  &
+               +(e2 - e1) / (e3 - e1) / (e4 - e1))
+          weights (kp3) = weights (kp3) + 1.0d0/ntetra * ( &
+               -(energy - e2)**3 * (1.0d0/ (e3 - e1)**2 / (e4 - e1) / (e4 - e2) + &
+               1.0d0/ (e3 - e1) / (e3 - e2)**2 / (e4 - e2) + &
+               1.0d0/ (e3 - e1)**2 / (e3 - e2) / (e4 - e2))  &
+               +3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1)**2 / (e4 - e1)) &
+               +3.0d0 * (energy - e2) * ((e2 - e1)/ (e3 - e1)**2 / (e4 - e1))  &
+               +(e2 - e1)**2 / (e3 - e1)**2 / (e4 - e1))
+          weights (kp4) = weights (kp4) + 1.0d0/ntetra * ( &
+               -(energy - e2)**3 * (1.0d0/ (e3 - e1) / (e3 - e2) / (e4 - e1)**2 + &
+               1.0d0/ (e3 - e2) / (e4 - e1) / (e4 - e2)**2 + &
+               1.0d0/ (e3 - e2) / (e4 - e1)**2 / (e4 - e2))  &
+               +3.0d0 * (energy - e2)**2 * (1.0d0/ (e3 - e1) / (e4 - e1)**2) &
+               +3.0d0 * (energy - e2) * ((e2 - e1)/ (e3 - e1) / (e4 - e1)**2)  &
+               +(e2 - e1)**2 / (e3 - e1) / (e4 - e1)**2)
+       elseif (energy.lt.e2.and.energy.ge.e1) then
+          fact = 1.0d0 / ntetra * (energy - e1) **2 / (e2 - e1) / (e3 - e1) &
+               / (e4 - e1)
+          weights (kp1) = weights (kp1) + fact * (3.d0 - (energy - e1) * &
+               (1.d0 / (e2 - e1) + 1.d0 / (e3 - e1) + 1.d0 / (e4 - e1) ) )
+          weights (kp2) = weights (kp2) + fact * (energy - e1) / (e2 - e1) 
+          weights (kp3) = weights (kp3) + fact * (energy - e1) / (e3 - e1) 
+          weights (kp4) = weights (kp4) + fact * (energy - e1) / (e4 - e1)
+       endif
+    enddo
+  end subroutine weights_delta1
+
+END MODULE tetra_ip
+