Merge AFQMC

Disable AFQMC build by default (BUILD_AFQMC=0).


git-svn-id: https://subversion.assembla.com/svn/qmcdev/trunk@7379 e5b18d87-469d-4833-9cc0-8cdfa06e9491
This commit is contained in:
Mark Dewing 2016-12-24 06:37:39 +00:00
commit e79b3e4844
90 changed files with 114893 additions and 12 deletions

View File

@ -185,6 +185,8 @@ IF(MIXED_PRECISION AND BUILD_LMYENGINE_INTERFACE)
MESSAGE(STATUS "LMY engine is not compatiable with CPU mixed precision build! Disabling LMY engine")
SET(BUILD_LMYENGINE_INTERFACE 0)
ENDIF()
SET(BUILD_AFQMC 0 CACHE BOOL "Build with AFQMC")
SET(BUILD_FCIQMC 0 CACHE BOOL "Build with FCIQMC")
#SET(BUILD_QMCTOOLS 1 CACHE BOOL "Build tools for QMCPACK")
#SET(BUILD_SANDBOX 0 CACHE BOOL "Build snadbox for testing")
#SET(MPIP_PROFILE 0 CACHE BOOL "Build with mpip for mpi profile")
@ -460,6 +462,7 @@ ELSE(CMAKE_TOOLCHAIN_FILE)
IF("${LAPACK_LIBRARIES}" MATCHES "mkl")
MESSAGE(STATUS "MKL found via LAPACK/BLAS")
SET( MKL_FOUND 1 )
SET( HAVE_MKL 1 )
ENDIF()
ENDIF()
IF(LAPACK_FOUND)
@ -476,27 +479,58 @@ ENDIF(CMAKE_TOOLCHAIN_FILE)
# check if C++11 is needed by QMCPACK features
SET(CXX11_NEEDED FALSE)
IF(BUILD_LMYENGINE_INTERFACE)
IF(BUILD_LMYENGINE_INTERFACE OR BUILD_AFQMC)
SET(CXX11_NEEDED TRUE)
ENDIF()
## add AFQMC checker here
# once C++11 is needed
IF(CXX11_NEEDED)
SET(CXX11_FLAG "-std=c++11")
#check if the CXX compiler supports -std=c++11 option
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG(-std=c++11 CXX_COMPILER_SUPPORT_CXX11)
CHECK_CXX_COMPILER_FLAG(${CXX11_FLAG} CXX_COMPILER_SUPPORT_CXX11)
# Turn on C++11 for this code
# Force the flag on Cray with Intel compiler, because the Cray wrapper
# prints an warning that interferes with the flag detection code
# with older versions of CMake.
IF($ENV{CRAYPE_VERSION} MATCHES ".")
IF( ${COMPILER} MATCHES "Intel" AND NOT CXX_COMPILER_SUPPORT_CXX11)
SET(CXX_COMPILER_SUPPORT_CXX11 TRUE)
MESSAGE(STATUS "Forcing C++11 support on Cray with Intel")
ENDIF()
ENDIF()
IF (CXX_COMPILER_SUPPORT_CXX11)
# avoid repeated -std=c++11 flag
IF(NOT CMAKE_CXX_FLAGS MATCHES "-std=c\\+\\+11")
SET (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
STRING(REPLACE "++" "\\+\\+" CXX11_FLAG_MATCH ${CXX11_FLAG})
IF(NOT CMAKE_CXX_FLAGS MATCHES ${CXX11_FLAG_MATCH})
SET (CMAKE_CXX_FLAGS "${CXX11_FLAG} ${CMAKE_CXX_FLAGS}")
ENDIF()
ELSE()
MESSAGE(STATUS "Disabling features requring C++11 due to the lack of compiler support")
SET(BUILD_LMYENGINE_INTERFACE 0)
## disable AFQMC here
SET(BUILD_AFQMC 0)
ENDIF()
ENDIF(CXX11_NEEDED)
# AFQMC requires MKL sparse
IF (BUILD_AFQMC AND NOT MKL_FOUND)
MESSAGE(STATUS "AFQMC requires MKL sparse libraries. Disabling AFQMC")
SET (BUILD_AFQMC 0)
ENDIF()
# AFQMC requires MPI
If (BUILD_AFQMC AND NOT QMC_MPI)
MESSAGE(FATAL_ERROR "AFQMC requires building with MPI (QMC_MPI=1)")
ENDIF()
If (BUILD_AFQMC AND QMC_COMPLEX)
MESSAGE(STATUS "Warning: Complex wavefunctions are not yet supported by AFQMC.")
ENDIF()
IF (BUILD_AFQMC AND CMAKE_COMPILER_IS_GNUCXX AND MKL_FOUND)
LINK_LIBRARIES("rt")
ENDIF()
# setup ctest variables
IF ( HAVE_MPI )

27
config/Fedora.cmake Normal file
View File

@ -0,0 +1,27 @@
#--------------------------------------------------------------------------
# setting compilers, compiler options and MKL_HOME
#--------------------------------------------------------------------------
set(CMAKE_C_COMPILER mpicc)
set(CMAKE_CXX_COMPILER mpic++ )
set(GNU_OPTS "-fopenmp -Wall -O3 -march=native -DADD_ -DINLINE_ALL=inline -DMAX_CHAR_PER_WALKER=98 -D_TIMER_ -D_LINUX_ -DUSE_MPI ")
set(CMAKE_C_FLAGS "${GNU_OPTS} -std=c99")
set(CMAKE_CXX_FLAGS "${GNU_OPTS} -std=c++11 -Wno-sign-compare -Drestrict=__restrict__")
set(ENABLE_OPENMP 1)
set(HAVE_MPI 1)
set(HAVE_SSE 1)
set(HAVE_SSE2 1)
set(HAVE_SSE3 1)
set(HAVE_SSSE3 1)
set(HAVE_SSE41 1)
set(USE_PREFETCH 1)
set(PREFETCH_AHEAD 10)
link_libraries(rt)
link_libraries(lapack)
link_libraries(blas)
set(TEST_MAX_PROCS 8)
set(QMC_MPI 1)
set(HAVE_MPI 1)

43
config/LLNLGNU.cmake Normal file
View File

@ -0,0 +1,43 @@
#--------------------------------------------------------------------------
# setting compilers, compiler options and MKL_HOME
#--------------------------------------------------------------------------
set(CMAKE_CXX_COMPILER mpicxx)
set(CMAKE_C_COMPILER mpicc)
set(GNU_OPTS "-DADD_ -DINLINE_ALL=inline -D_TIMER_ -DUSE_MPI -D_LINUX_")
set(INTEL_OPTS "-g -malign-double -fomit-frame-pointer -ffast-math -fopenmp -O3 -msse4 -msse4.1 -Drestrict=__restrict__ -finline-limit=1000 -fstrict-aliasing -funroll-all-loops -Wno-deprecated")
set(CMAKE_CXX_FLAGS "$ENV{CXX_FLAGS} ${GNU_OPTS} ${INTEL_OPTS} -std=c++11 ")
set(CMAKE_C_FLAGS "$ENV{CC_FLAGS} ${INTEL_OPTS} -std=c99 ")
#--------------------------------------------------------------------------
# below is common for INTEL compilers and MKL library
#--------------------------------------------------------------------------
set(ENABLE_OPENMP 1)
set(HAVE_MPI 1)
set(HAVE_SSE 1)
set(HAVE_SSE2 1)
set(HAVE_SSE3 1)
set(HAVE_SSSE3 1)
set(HAVE_SSE41 1)
set(USE_PREFETCH 1)
set(PREFETCH_AHEAD 10)
set(HAVE_MKL 1)
set(HAVE_MKL_VML 1)
set( ENV{BOOST_ROOT} /usr/gapps/qmc/libs/INTEL/boost_1_62_0 )
set( CMAKE_FIND_ROOT_PATH
/usr/lib64/
)
# mkl 10.3.x
include_directories(/usr/local/tools/mkl-11.3.2/include)
set(LAPACK_LIBRARY -L/usr/local/tools/mkl-11.3.2/mkl/lib/intel64 -Wl,--no-as-needed -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lrt -Wl,-rpath=/usr/local/tools/mkl-11.3.2/mkl/lib/intel64)
SET(CMAKE_CXX_LINK_SHARED_LIBRARY)
SET(CMAKE_CXX_LINK_MODULE_LIBRARY)
SET(CMAKE_C_LINK_SHARED_LIBRARY)
SET(CMAKE_C_LINK_MODULE_LIBRARY)

View File

@ -0,0 +1,43 @@
#--------------------------------------------------------------------------
# setting compilers, compiler options and MKL_HOME
#--------------------------------------------------------------------------
set(CMAKE_CXX_COMPILER mpicxx)
set(CMAKE_C_COMPILER mpicc)
set(GNU_OPTS "-DADD_ -DINLINE_ALL=inline -D_TIMER_ -DUSE_MPI -D_LINUX_")
set(INTEL_OPTS "-g -malign-double -fomit-frame-pointer -ffast-math -fopenmp -O3 -msse4 -Drestrict=__restrict__ -finline-limit=1000 -fstrict-aliasing -funroll-all-loops -Wno-deprecated")
set(CMAKE_CXX_FLAGS "$ENV{CXX_FLAGS} ${GNU_OPTS} ${INTEL_OPTS} -std=c++11 ")
set(CMAKE_C_FLAGS "$ENV{CC_FLAGS} ${INTEL_OPTS} -std=c99 ")
#--------------------------------------------------------------------------
# below is common for INTEL compilers and MKL library
#--------------------------------------------------------------------------
set(ENABLE_OPENMP 1)
set(HAVE_MPI 1)
set(HAVE_SSE 1)
set(HAVE_SSE2 1)
set(HAVE_SSE3 1)
set(HAVE_SSSE3 1)
set(HAVE_SSE41 1)
set(USE_PREFETCH 1)
set(PREFETCH_AHEAD 10)
set(HAVE_MKL 1)
set(HAVE_MKL_VML 1)
set( CMAKE_FIND_ROOT_PATH
/usr/tce/packages/fftw/fftw-3.3.4-mvapich2-2.2-gcc-4.8-redhat/lib/
/usr/gapps/qmc/libs/INTEL/boost_1_62_0
/usr/lib64/
)
# mkl 10.3.x
include_directories(/usr/tce/packages/mkl/mkl-11.3.3/include)
set(LAPACK_LIBRARY -L/usr/tce/packages/mkl/mkl-11.3.3/mkl/lib/intel64 -Wl,--no-as-needed -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lrt -Wl,-rpath=/usr/tce/packages/mkl/mkl-11.3.3/mkl/lib/intel64)
SET(CMAKE_CXX_LINK_SHARED_LIBRARY)
SET(CMAKE_CXX_LINK_MODULE_LIBRARY)
SET(CMAKE_C_LINK_SHARED_LIBRARY)
SET(CMAKE_C_LINK_MODULE_LIBRARY)

View File

@ -0,0 +1,43 @@
#--------------------------------------------------------------------------
# setting compilers, compiler options and MKL_HOME
#--------------------------------------------------------------------------
set(CMAKE_CXX_COMPILER mpicxx)
set(CMAKE_C_COMPILER mpicc)
set(GNU_OPTS "-DADD_ -DINLINE_ALL=inline -D_TIMER_ -DUSE_MPI -D_LINUX_")
set(INTEL_OPTS "-g -unroll -O3 -ip -qopenmp -qopt-prefetch -ftz -xHost -DUSE_REAL_STRUCT_FACTOR")
set(CMAKE_CXX_FLAGS "$ENV{CXX_FLAGS} ${GNU_OPTS} ${INTEL_OPTS} -restrict -Wno-deprecated -std=c++11 ")# -cxx=icpc")
set(CMAKE_C_FLAGS "$ENV{CC_FLAGS} ${INTEL_OPTS} -std=c99 -restrict -Wno-deprecated")
#--------------------------------------------------------------------------
# below is common for INTEL compilers and MKL library
#--------------------------------------------------------------------------
set(ENABLE_OPENMP 1)
set(HAVE_MPI 1)
set(HAVE_SSE 1)
set(HAVE_SSE2 1)
set(HAVE_SSE3 1)
set(HAVE_SSSE3 1)
set(HAVE_SSE41 1)
set(USE_PREFETCH 1)
set(PREFETCH_AHEAD 10)
set(HAVE_MKL 1)
set(HAVE_MKL_VML 1)
set( CMAKE_FIND_ROOT_PATH
/usr/tce/packages/fftw/fftw-3.3.4-mvapich2-2.2-intel-16.0.3/lib/
/usr/gapps/qmc/libs/INTEL/boost_1_40_0
/usr/lib64/
)
# mkl 10.3.x
include_directories(/usr/tce/packages/mkl/mkl-11.3.3/include)
set(LAPACK_LIBRARY -L/usr/tce/packages/mkl/mkl-11.3.3/mkl/lib/intel64 -mkl=sequential -lrt -Wl,-rpath=/usr/tce/packages/mkl/mkl-11.3.3/mkl/lib/intel64)
SET(CMAKE_CXX_LINK_SHARED_LIBRARY)
SET(CMAKE_CXX_LINK_MODULE_LIBRARY)
SET(CMAKE_C_LINK_SHARED_LIBRARY)
SET(CMAKE_C_LINK_MODULE_LIBRARY)

View File

@ -2,3 +2,6 @@
SUBDIRS(molecules/H2O)
SUBDIRS(molecules/He)
IF (BUILD_AFQMC AND NOT QMC_COMPLEX)
SUBDIRS(afqmc/n2_vdz)
ENDIF()

View File

@ -0,0 +1,13 @@
INCLUDE( "${qmcpack_SOURCE_DIR}/CMake/macros.cmake" )
LIST(APPEND AFQMC_N2_SCALARS "Ebound" "-109.26 0.02")
QMC_RUN_AND_CHECK(short-afqmc-N2_vdz
"${CMAKE_SOURCE_DIR}/examples/afqmc/n2_vdz"
N2
n2.xml
4 1
AFQMC_N2_SCALARS
0 # VMC
TRUE)

83031
examples/afqmc/n2_vdz/FCIDUMP Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,52 @@
<?xml version="1.0"?>
<simulation method="afqmc">
<project id="N2" series="0"/>
<AFQMCInfo name="info0">
<parameter name="NMO">28</parameter>
<parameter name="NAEA">7</parameter>
<parameter name="NAEB">7</parameter>
<parameter name="NETOT">14</parameter>
<parameter name="NCA">0</parameter>
<parameter name="NCB">0</parameter>
</AFQMCInfo>
<Hamiltonian name="ham0" type="SparseGeneral" info="info0">
<parameter name="filetype">fcidump</parameter>
<parameter name="filename">FCIDUMP</parameter>
<parameter name="cutoff_1bar">1e-6</parameter>
<parameter name="cutoff_2bar">1e-6</parameter>
<parameter name="cutoff_decomposition">1e-5</parameter>
<parameter name="hdf_write_file">ham.h5</parameter>
</Hamiltonian>
<Wavefunction name="wfn0" info="info0">
<ImpSamp name="impsamp0" type="PureSD" init="ground" >
<parameter name="filetype">none</parameter>
<parameter name="cutoff">1e-6</parameter>
<parameter name="hdf_write_file">wfn.h5</parameter>
</ImpSamp>
</Wavefunction>
<WalkerSet name="wset0" type="distributed">
<parameter name="min_weight">0.05</parameter>
<parameter name="max_weight">4</parameter>
<parameter name="reset_weight">1</parameter>
<parameter name="extra_spaces">10</parameter>
</WalkerSet>
<Propagator name="prop0" phaseless="yes" localenergy="yes" drift="yes" info="info0">
<parameter name="cutoff_propg">1e-6</parameter>
<parameter name="hdf_write_file">prop.h5</parameter>
<parameter name="parallel_factorization">yes</parameter>
</Propagator>
<execute wset="wset0" ham="ham0" wfn="wfn0" prop="prop0" info="info0">
<parameter name="timestep">0.005</parameter>
<parameter name="blocks">100</parameter>
<parameter name="steps">4</parameter>
<parameter name="substeps">4</parameter>
<parameter name="nWalkers">100</parameter>
</execute>
</simulation>

View File

@ -6,6 +6,8 @@ QMC_MPI = @QMC_MPI@
QMC_OMP = @QMC_OMP@
QMC_CUDA = @QMC_CUDA@
QMC_COMPLEX = @QMC_COMPLEX@
BUILD_AFQMC = @BUILD_AFQMC@
BUILD_FCIQMC = @BUILD_FCIQMC@
#system variables
QMC_HOSTNAME = @QMC_HOSTNAME@

379
src/AFQMC/AFQMCFactory.cpp Executable file
View File

@ -0,0 +1,379 @@
// -*- C++ -*-
/**@file AFQMCFactory.cpp
* @brief Top level class for AFQMC. Parses input and performs setup of classes.
*/
//#ifdef AFQMC
#if 1>0
#include<iostream>
#include<fstream>
#include<string>
#include<vector>
#include<map>
#include<complex>
#include<tuple>
#include <queue>
#include<algorithm>
#include<Message/MPIObjectBase.h>
#include "Message/OpenMP.h"
#include "Message/Communicate.h"
#include "Message/CommOperators.h"
#include "OhmmsData/libxmldefs.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "Configuration.h"
#include "OhmmsApp/RandomNumberControl.h"
#include <qmc_common.h>
#include"AFQMC/AFQMCFactory.h"
#include "config.h"
#include<AFQMC/Walkers/WalkerHandlerBase.h>
#include<AFQMC/Walkers/DistWalkerHandler.h>
#include<AFQMC/Walkers/LocalWalkerHandler.h>
#include<AFQMC/Hamiltonians/HamiltonianBase.h>
#include<AFQMC/Estimators/EstimatorHandler.h>
#include<AFQMC/Propagators/PropagatorBase.h>
#include<AFQMC/Wavefunctions/WavefunctionHandler.h>
#include<AFQMC/Propagators/phaseless_ImpSamp_ForceBias.h>
#include<AFQMC/Propagators/VMCPropagator.h>
#include<AFQMC/Hamiltonians/SparseGeneralHamiltonian.h>
#include"AFQMC/Wavefunctions/PureSingleDeterminant.h"
#include"AFQMC/Drivers/Driver.h"
#include"AFQMC/Drivers/selectedCI.h"
#include"AFQMC/Drivers/AFQMCDriver.h"
//#include"AFQMC/Drivers/AFQMCDistDriver.h"
#include"AFQMC/Drivers/VMCDriver.h"
#include"AFQMC/Utilities/myTimer.h"
// place timer here
myTimer Timer;
namespace qmcplusplus
{
AFQMCFactory::AFQMCFactory(Communicate* c, RandomNumberControl &m):MPIObjectBase(c),myRandomControl(m),m_series(0),project_title("afqmc")
{
head_of_nodes = myComm->head_nodes(MPI_COMM_HEAD_OF_NODES);
}
bool AFQMCFactory::parse(xmlNodePtr cur)
{
/* Notes:
*
* 1. I don't need myComm in most classes, right now only for abort purposes. Only walker handler should need MPI
* 2. right now hard-coding with SlaterDetWalker, later on make everything templated based on walker type.
*
*/
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
WalkerMap.clear();
HamMap.clear();
EstimatorMap.clear();
PropMap.clear();
WfnMap.clear();
InfoMap.clear();
app_log()<<" name: " <<cur->name <<std::endl;
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="Project" || cname =="project") {
OhmmsAttributeSet oAttrib;
oAttrib.add(project_title,"id");
oAttrib.add(project_title,"name");
oAttrib.add(m_series,"series");
oAttrib.put(cur);
}
cur=cur->next;
}
// first look only for AFQMCInfo
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="AFQMCInfo") {
AFQMCInfo* info = new AFQMCInfo();
if(!info->parse(cur)) {
app_error()<<"Error in AFQMCInfo::parse(xmlNodePtr)." <<std::endl;
return false;
}
std::pair<std::map<std::string,AFQMCInfo*>::iterator,bool> ret;
ret = InfoMap.insert ( std::pair<std::string,AFQMCInfo*>(info->name,info) );
if (ret.second==false) {
app_error()<<"ERROR: AFQMCInfo xml-block already defined: " <<info->name;
return false;
}
}
cur=cur->next;
}
// now look for non-executable blocks
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="Hamiltonian") {
// building it right here since there is only 1 option
// make a builder class that returns the pointer to the created object if necessary later
HamiltonianBase* obj = (HamiltonianBase*) new SparseGeneralHamiltonian(myComm);
obj->setHeadComm(head_of_nodes,MPI_COMM_HEAD_OF_NODES);
if(!obj->parse(cur)) {
app_error()<<"Error in SparseGeneralHamiltonian::parse(xmlNodePtr)." <<std::endl;
return false;
}
std::pair<std::map<std::string,HamiltonianBase*>::iterator,bool> ret;
ret = HamMap.insert ( std::pair<std::string,HamiltonianBase*>(obj->name,obj) );
if (ret.second==false) {
app_error()<<"ERROR: HamiltonianBase xml-block already defined: " <<obj->name;
return false;
}
std::string info("info0");
OhmmsAttributeSet oAttrib;
oAttrib.add(info,"info");
oAttrib.put(cur);
if(InfoMap.find(info) == InfoMap.end()) {
app_error()<<"ERROR: Undefined info:" <<info <<" \n";
return false;
}
obj->copyInfo(*InfoMap[info]);
} else if(cname == "Wavefunction") {
WavefunctionHandler* obj = new WavefunctionHandler(myComm);
obj->setHeadComm(head_of_nodes,MPI_COMM_HEAD_OF_NODES);
if(!obj->parse(cur)) {
app_error()<<"Error in WavefunctionHandler::parse(xmlNodePtr)." <<std::endl;
return false;
}
std::pair<std::map<std::string,WavefunctionHandler*>::iterator,bool> ret;
ret = WfnMap.insert ( std::pair<std::string,WavefunctionHandler*>(obj->name,obj) );
if (ret.second==false) {
app_error()<<"ERROR: WavefunctionBase xml-block already defined: " <<obj->name;
return false;
}
std::string info("info0");
OhmmsAttributeSet oAttrib;
oAttrib.add(info,"info");
oAttrib.put(cur);
if(InfoMap.find(info) == InfoMap.end()) {
app_error()<<"ERROR: Undefined info:" <<info <<" \n";
return false;
}
obj->copyInfo(*InfoMap[info]);
} else if(cname == "WalkerSet") {
std::string type("local");
std::string info("info0");
OhmmsAttributeSet oAttrib;
oAttrib.add(info,"info");
oAttrib.add(type,"type");
oAttrib.put(cur);
WalkerHandlerBase* obj;
if(type == "distributed" || type == "dist")
obj = (WalkerHandlerBase*) new DistWalkerHandler(myComm);
else if(type=="local")
obj = (WalkerHandlerBase*) new LocalWalkerHandler(myComm);
else {
app_error()<<"Unknown WalkerSet type: " <<type <<std::endl;
return false;
}
if(!obj->parse(cur)) {
app_error()<<"Error in WalkerHandler::parse(xmlNodePtr)." <<std::endl;
return false;
}
std::pair<std::map<std::string,WalkerHandlerBase*>::iterator,bool> ret;
ret = WalkerMap.insert ( std::pair<std::string,WalkerHandlerBase*>(obj->name,obj) );
if (ret.second==false) {
app_error()<<"ERROR: WalkerHandler xml-block already defined: " <<obj->name;
return false;
}
if(InfoMap.find(info) == InfoMap.end()) {
app_error()<<"ERROR: Undefined info:" <<info <<" \n";
return false;
}
obj->copyInfo(*InfoMap[info]);
} else if(cname == "Propagator") {
std::string type("afqmc");
std::string info("info0");
OhmmsAttributeSet oAttrib;
oAttrib.add(info,"info");
oAttrib.add(type,"type");
oAttrib.put(cur);
PropagatorBase* obj;
if(type == "afqmc")
obj = (PropagatorBase*) new phaseless_ImpSamp_ForceBias(myComm,RandomNumberControl::Children[0]);
else if(type == "vmc")
obj = (PropagatorBase*) new VMCPropagator(myComm,RandomNumberControl::Children[0]);
else {
app_error()<<"Unknown propagator type: " <<type <<std::endl;
return false;
}
obj->setHeadComm(head_of_nodes,MPI_COMM_HEAD_OF_NODES);
if(!obj->parse(cur)) {
app_error()<<"Error in phaseless_ImpSamp_ForceBias::parse(xmlNodePtr)." <<std::endl;
return false;
}
std::pair<std::map<std::string,PropagatorBase*>::iterator,bool> ret;
ret = PropMap.insert ( std::pair<std::string,PropagatorBase*>(obj->name,obj) );
if (ret.second==false) {
app_error()<<"ERROR: PropagatorBase xml-block already defined: " <<obj->name;
return false;
}
if(InfoMap.find(info) == InfoMap.end()) {
app_error()<<"ERROR: Undefined info:" <<info <<" \n";
return false;
}
obj->copyInfo(*InfoMap[info]);
}
cur = cur->next;
}
return true;
}
bool AFQMCFactory::execute(xmlNodePtr cur)
{
if(cur == NULL)
return false;
int nproc = myComm->size();
int nodeid = myComm->rank();
int groupid=myComm->getGroupID();
char fileroot[256];
bool no_gtag= (qmc_common.mpi_groups==1);
xmlNodePtr curRoot=cur;
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="execute") {
if(no_gtag) //qnproc_g == nproc)
sprintf(fileroot,"%s.s%03d",project_title.c_str(),m_series);
else
sprintf(fileroot,"%s.g%03d.s%03d",project_title.c_str(),groupid,m_series);
//set the communicator name
myComm->setName(fileroot);
Driver* driver;
// check that necessary objects exist
std::string type("afqmc");
std::string ham("ham0");
std::string wfn("wfn0");
std::string wset("wset0");
std::string prop("prop0");
std::string info("info0");
std::string paral("no");
OhmmsAttributeSet oAttrib;
oAttrib.add(info,"info");
oAttrib.add(prop,"prop");
oAttrib.add(wset,"wset");
oAttrib.add(wfn,"wfn");
oAttrib.add(ham,"ham");
oAttrib.add(type,"type");
oAttrib.add(paral,"distributed");
oAttrib.put(cur);
if(type == "afqmc")
driver = (Driver*) new AFQMCDriver(myComm);
else if(type == "vmc")
driver = (Driver*) new VMCDriver(myComm);
else if(type == "selectedCI" || type == "selectedci" || type == "selCI" || type == "selci")
driver = (Driver*) new selectedCI(myComm);
else {
app_error()<<"Unknown execute driver: " <<type <<std::endl;
return false;
}
driver->setHeadComm(head_of_nodes,MPI_COMM_HEAD_OF_NODES);
WalkerHandlerBase* wlkh0=NULL;
HamiltonianBase* h0=NULL;
PropagatorBase* p0=NULL;
WavefunctionHandler* wfh0=NULL;
if(InfoMap.find(info) == InfoMap.end()) {
app_error()<<"ERROR: Undefined info in execute block. \n";
return false;
}
if(WalkerMap.find(wset) != WalkerMap.end()) wlkh0 = WalkerMap[wset];
if(HamMap.find(ham) != HamMap.end()) h0 = HamMap[ham];
if(PropMap.find(prop) != PropMap.end()) p0 = PropMap[prop];
if(WfnMap.find(wfn) != WfnMap.end()) wfh0 = WfnMap[wfn];
driver->copyInfo(*InfoMap[info]);
if(!driver->parse(cur)) {
app_error()<<"Error in AFQMCDriver::parse(xmlNodePtr)." <<std::endl;
return false;
}
if(!driver->setup(h0,wlkh0,p0,wfh0)) {
app_error()<<"Error in AFQMCDriver::setup(...)." <<std::endl;
return false;
}
// execute driver
if(!driver->run()) {
app_error()<<"Error in AFQMCDriver::run()" <<std::endl;
return false;
}
if(!driver->clear()) {
app_error()<<"Error in AFQMCDriver::clear()." <<std::endl;
return false;
}
m_series++;
}
cur=cur->next;
}
return true;
}
}
#else
// in case no AFQMC is compiled
#include"AFQMC/AFQMCFactory.h"
#include<iostream>
namespace qmcplusplus
{
AFQMCFactory::AFQMCFactory(Communicate* c):MPIObjectBase(c)
{
}
bool AFQMCFactory::parse(xmlNodePtr cur)
{
std::cerr<<"Executable not compiled with AFQMC support. \n";
return false;
}
bool AFQMCFactory::execute(xmlNodePtr cur)
{
std::cerr<<"Executable not compiled with AFQMC support. \n";
return false;
}
}
#endif

124
src/AFQMC/AFQMCFactory.h Executable file
View File

@ -0,0 +1,124 @@
// -*- C++ -*-
/**@file AFQMCFactory.h
* @brief Top level class for AFQMC. Parses input and performs setup of classes.
*/
#ifndef QMCPLUSPLUS_AFQMCFACTORY_H
#define QMCPLUSPLUS_AFQMCFACTORY_H
//#ifdef AFQMC
#if 1>0
#include<string>
#include<vector>
#include<map>
#include <queue>
#include<algorithm>
#include<Message/MPIObjectBase.h>
#include "OhmmsApp/RandomNumberControl.h"
#include "config.h"
#include<AFQMC/Drivers/Driver.h>
#include<AFQMC/Walkers/WalkerHandlerBase.h>
#include<AFQMC/Hamiltonians/HamiltonianBase.h>
#include<AFQMC/Estimators/EstimatorHandler.h>
#include<AFQMC/Propagators/PropagatorBase.h>
#include<AFQMC/Wavefunctions/WavefunctionHandler.h>
#include "OhmmsData/libxmldefs.h"
namespace qmcplusplus
{
class AFQMCFactory: public MPIObjectBase
{
public:
///constructor
AFQMCFactory(Communicate* c, RandomNumberControl&);
///destructor
~AFQMCFactory() {}
/*
* Parses xml input and creates all non-executable objects.
* Created objects (pointers actually) are stored in maps based on name in xml block.
* Executable sections (drivers) are created with objects already exiting
* in the maps.
*/
bool parse(xmlNodePtr cur);
/*
* Parses xml input and creates executable sections, using objects created during parsing.
*/
bool execute(xmlNodePtr cur);
private:
int m_series;
std::string project_title;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
// container of AFQMCInfo objects
std::map<std::string,AFQMCInfo*> InfoMap;
// container of walker handlers
std::map<std::string,WalkerHandlerBase*> WalkerMap;
// container of hamiltonians
std::map<std::string,HamiltonianBase*> HamMap;
// container of estimators
std::map<std::string,EstimatorHandler*> EstimatorMap;
// container of propagators
std::map<std::string,PropagatorBase*> PropMap;
// container of wavefunctions
std::map<std::string,WavefunctionHandler*> WfnMap;
///random number controller
RandomNumberControl& myRandomControl;
};
}
#else
namespace qmcplusplus
{
class AFQMCFactory: public MPIObjectBase
{
public:
///constructor
AFQMCFactory(Communicate* c);
///destructor
~AFQMCFactory() {}
/*
* Parses xml input and creates all non-executable objects.
* Created objects (pointers actually) are stored in maps based on name in xml block.
* Executable sections (drivers) are created with objects already exiting
* in the maps.
*/
bool parse(xmlNodePtr cur);
/*
* Parses xml input and creates executable sections, using objects created during parsing.
*/
bool execute(xmlNodePtr cur);
};
}
#endif
#endif

33
src/AFQMC/CMakeLists.txt Executable file
View File

@ -0,0 +1,33 @@
SET(AFQMC_DEBUG 3)
#-------------------------------------------------------------------
# Sources
#-------------------------------------------------------------------
SET (AFQMC_SRCS
AFQMCFactory.cpp
Walkers/LocalWalkerHandler.cpp
Walkers/DistWalkerHandler.cpp
Drivers/AFQMCDriver.cpp
Drivers/selectedCI.cpp
Drivers/VMCDriver.cpp
Hamiltonians/SparseGeneralHamiltonian.cpp
Wavefunctions/WavefunctionHandler.cpp
Wavefunctions/PureSingleDeterminant.cpp
Wavefunctions/MultiPureSingleDeterminant.cpp
Wavefunctions/GeneralSingleDeterminant.cpp
#Wavefunctions/MultiGeneralSingleDeterminant.cpp
Propagators/phaseless_ImpSamp_ForceBias.cpp
Propagators/VMCPropagator.cpp
Numerics/SparseMatrixOperations.cpp
Numerics/DenseMatrixOperations.cpp
Sandbox/compare_libraries.cpp
Wavefunctions/WavefunctionHelper.cpp
Hamiltonians/DDProjector.cpp
Hamiltonians/CCProjector.cpp
Utilities/Utils.cpp
)
INCLUDE_DIRECTORIES(/usr/gapps/qmc/libs/INTEL/eigen-eigen-10219c95fe65/)
ADD_LIBRARY(afqmc ${AFQMC_SRCS})

486
src/AFQMC/Drivers/AFQMCDriver.cpp Executable file
View File

@ -0,0 +1,486 @@
#include<tuple>
#include<map>
#include<string>
#include<iomanip>
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include<Message/MPIObjectBase.h>
#include "Message/OpenMP.h"
#include "Message/Communicate.h"
#include "Message/CommOperators.h"
#include "OhmmsData/libxmldefs.h"
#include "Configuration.h"
#include <qmc_common.h>
#include "AFQMC/config.h"
#include "AFQMC/Drivers/AFQMCDriver.h"
namespace qmcplusplus {
bool AFQMCDriver::run()
{
if(compare_libraries)
{
prop0->benchmark();
return true;
}
if(!restarted) {
Eshift=wlkBucket->getEloc(0).real();
Etav=wlkBucket->getEloc(0).real();
step0=block0=0;
}
RealType w0 = wlkBucket->GlobalWeight();
int nwalk_ini = wlkBucket->GlobalPopulation();
estim0->setTargetWeight(w0);
app_log()<<"Initial weight and number of walkers: " <<w0 <<" " <<nwalk_ini <<std::endl;
std::ofstream out_timers;
if(print_timers > 0 && myComm->rank()==0) {
out_timers.open("timers.dat");//,std::ios_base::app | std::ios_base::out);
}
// problems with using step_tot to do ortho and load balance
int time = step0*nSubstep, step_tot=step0, iBlock ;
for (iBlock=block0; iBlock<nBlock; ++iBlock) {
LocalTimer.start("Block::TOTAL");
for (int iStep=0; iStep<nStep; ++iStep, ++step_tot) {
// propagate
for (int iSubstep=0; iSubstep<nSubstep; ++iSubstep,++time) {
LocalTimer.start("SubStep::Propagate");
prop0->Propagate(time,wlkBucket,Eshift,Eshift);
LocalTimer.stop("SubStep::Propagate");
estim0->accumulate_substep(wlkBucket);
} // iSubstep
// quantities that are measured once per step
estim0->accumulate_step(wlkBucket);
if (step_tot != 0 && step_tot % nPopulationControl == 0) {
LocalTimer.start("Step::PopControl");
wlkBucket->popControl();
LocalTimer.stop("Step::PopControl");
}
if (step_tot != 0 && step_tot % nloadBalance == 0) {
LocalTimer.start("Step::loadBalance");
wlkBucket->loadBalance();
LocalTimer.stop("Step::loadBalance");
}
if (step_tot != 0 && step_tot % nStabalize == 0) { // && it->alive) {
LocalTimer.start("Step::Orthogonalize");
wlkBucket->Orthogonalize();
wfn0->evaluateOverlap("ImportanceSampling",-1,wlkBucket);
LocalTimer.stop("Step::Orthogonalize");
}
//Etav += estim0->getEloc_step();
if(time*dt < 1.0)
Etav = estim0->getEloc_step();
else
Etav += dShift*0.1*(estim0->getEloc_step()-Etav);
if(time*dt < 1.0)
Eshift = estim0->getEloc_step();
else
Eshift += dShift*(estim0->getEloc_step()-Eshift);
}
// checkpoint
if(nCheckpoint > 0 && iBlock != 0 && iBlock % nCheckpoint == 0)
if(!checkpoint(iBlock,step_tot)) {
app_error()<<" Error in AFQMCDriver::checkpoint(). \n" <<std::endl;
return false;
}
// write samples
if(samplePeriod > 0 && iBlock != 0 && iBlock % samplePeriod == 0)
if(!writeSamples()) {
app_error()<<" Error in AFQMCDriver::writeSamples(). \n" <<std::endl;
return false;
}
// quantities that are measured once per block
estim0->accumulate_block(wlkBucket);
LocalTimer.stop("Block::TOTAL");
estim0->print(iBlock+1,time*dt,Eshift,Etav,wlkBucket);
if(print_timers > 0 && myComm->rank()==0 && iBlock%print_timers == 0) output_timers(out_timers,iBlock);
}
checkpoint(iBlock,step_tot);
app_log()<<"----------------------------------------------------------------\n";
app_log()<<" LocalTimer: \n";
LocalTimer.print_average_all(app_log());
app_log()<<" Timer: \n";
Timer.print_average_all(app_log());
app_log()<<"----------------------------------------------------------------\n";
return true;
}
bool AFQMCDriver::parse(xmlNodePtr cur)
{
if(cur==NULL) return false;
std::string str,str1;
int cmp_lib=0;
int deb_prop=0;
ncores_per_TG=1;
ParameterSet m_param;
m_param.add(nBlock,"blocks","int");
m_param.add(nStep,"steps","int");
m_param.add(nSubstep,"substeps","int");
m_param.add(nPopulationControl,"popControl","int");
m_param.add(nWalkers,"nWalkers","int");
m_param.add(nStabalize,"ortho","int");
m_param.add(nloadBalance,"loadBalance","int");
m_param.add(nCheckpoint,"checkpoint","int");
m_param.add(samplePeriod,"samplePeriod","int");
m_param.add(print_timers,"timers","int");
m_param.add(print_timers,"timer","int");
m_param.add(print_timers,"print_timers","int");
m_param.add(print_timers,"print_timer","int");
m_param.add(ncores_per_TG,"ncores_per_TG","int");
m_param.add(ncores_per_TG,"ncores","int");
m_param.add(ncores_per_TG,"cores","int");
m_param.add(dt,"dt","double");
m_param.add(dt,"timestep","double");
m_param.add(dShift,"dshift","double");
m_param.add(cmp_lib,"test_library","int");
m_param.add(deb_prop,"debug","int");
m_param.add(min_total_weight,"min_total_weight","double");
m_param.add(str1,"set_nWalker_to_target","std::string");
m_param.add(str1,"set_nwalker_to_target","std::string");
m_param.add(hdf_read_tag,"hdf_read_tag","std::string");
m_param.add(hdf_read_restart,"hdf_read_file","std::string");
m_param.add(hdf_write_tag,"hdf_write_tag","std::string");
m_param.add(hdf_write_restart,"hdf_write_file","std::string");
m_param.put(cur);
min_total_weight = std::max( std::min(min_total_weight , 1.0), 0.1 );
if(cmp_lib != 0) compare_libraries=true;
if(deb_prop != 0) debug=true;
std::transform(str1.begin(),str1.end(),str1.begin(),(int (*)(int)) tolower);
if(str1 == "yes" || str1 == "true")
set_nWalker_target = true;
estim0 = new EstimatorHandler(myComm);
estim0->copyInfo(*this);
estim0->parse(cur);
return true;
}
bool AFQMCDriver::setup(HamPtr h0, WSetPtr w0, PropPtr p0, WfnPtr wf0)
{
ham0=h0;
wlkBucket=w0;
prop0=p0;
wfn0=wf0;
restarted=false;
// temporary
/*
localwlkBucket = dynamic_cast<LocalWalkerHandler*>(wlkBucket);
if(!localwlkBucket) {
app_error()<<" Error in AFQMCDriver::setup() \n"
<<" Conversion to LocalWalkerHandler was unsuccessful. \n\n";
return false;
}
*/
app_log()<<"\n****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<" Beginning Driver initialization.\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<std::endl;
app_log()<<" Using " <<ncores_per_TG <<" cores per node in a TaskGroup. \n";
// right now this TG is not used. It is needed for setup purposes and to
// get a unique TG number for every group of cores on a node (used in the WalkerSet)
TG.setup(ncores_per_TG,1,false);
std::vector<int> TGdata(5);
TG.getSetupInfo(TGdata);
// setup local-to-node MPI Comm
// TGdata[0]: node_number
myComm->split_comm(TGdata[0],MPI_COMM_NODE_LOCAL);
TG.setNodeCommLocal(MPI_COMM_NODE_LOCAL);
int key = TG.getTGNumber(); // This works because the TG used has nnodes_per_TG=1
myComm->split_comm(key,MPI_COMM_TG_LOCAL);
TG.setTGCommLocal(MPI_COMM_TG_LOCAL);
key = TG.getCoreRank();
myComm->split_comm(key,MPI_COMM_TG_LOCAL_HEADS);
CommBuffer.setup(TG.getCoreRank()==0,std::string("COMMBuffer_")+std::to_string(TG.getTGNumber()),MPI_COMM_TG_LOCAL);
TG.setBuffer(&CommBuffer);
hdf_archive read(myComm);
if(myComm->rank() == 0) {
if(hdf_read_restart != std::string("")) {
if(read.open(hdf_read_restart,H5F_ACC_RDONLY,false))
restarted = restart(read);
if(!restarted) {
read.close();
app_log()<<" WARNING: Problems restarting simulation. Starting from default settings. \n";
}
}
}
myComm->bcast(restarted);
if(restarted) {
app_log()<<" Restarted from file. Block, step: " <<block0 <<" " <<step0 <<std::endl;
app_log()<<" Eshift, Etav: " <<Eshift <<" " <<Etav <<std::endl;
myComm->bcast(Eshift);
myComm->bcast(Etav);
myComm->bcast(block0);
myComm->bcast(step0);
}
app_log()<<"\n****************************************************\n"
<<" Initializating Hamiltonian \n"
<<"****************************************************\n"
<<std::endl;
// hamiltonian
if(!ham0->init(TGdata,&CommBuffer,MPI_COMM_TG_LOCAL,MPI_COMM_NODE_LOCAL)) {
app_error()<<"Error initializing Hamiltonian in AFQMCDriver::setup" <<std::endl;
return false;
}
app_log()<<"\n****************************************************\n"
<<" Initializating Wavefunction \n"
<<"****************************************************\n"
<<std::endl;
if(!wfn0->init(TGdata,&CommBuffer,read,hdf_read_tag,MPI_COMM_TG_LOCAL,MPI_COMM_NODE_LOCAL)) {
app_error()<<"Error initializing Wavefunction in AFQMCDriver::setup" <<std::endl;
return false;
}
if(!wfn0->setup(ham0)) {
app_error()<<"Error in WavefunctionHandler::setup in AFQMCDriver::setup" <<std::endl;
return false;
}
app_log()<<"\n****************************************************\n"
<<" Initializating Walker Handler \n"
<<"****************************************************\n"
<<std::endl;
// walker set
wlkBucket->setup(TG.getCoreRank(),ncores_per_TG,TG.getTGNumber(),MPI_COMM_TG_LOCAL_HEADS,MPI_COMM_TG_LOCAL,MPI_COMM_NODE_LOCAL,&LocalTimer);
wlkBucket->setHF(wfn0->getHF());
if(restarted) {
wlkBucket->restartFromHDF5(nWalkers,read,hdf_read_tag,set_nWalker_target);
app_log()<<"Number of walkers after restart: " <<wlkBucket->GlobalPopulation() <<std::endl;
wfn0->evaluateLocalEnergyAndOverlap("ImportanceSampling",-1,wlkBucket);
} else {
wlkBucket->initWalkers(nWalkers);
wfn0->evaluateLocalEnergyAndOverlap("ImportanceSampling",-1,wlkBucket);
}
app_log()<<"\n****************************************************\n"
<<" Initializating Propagator \n"
<<"****************************************************\n"
<<std::endl;
// propagator
if(!prop0->setup(TGdata,&CommBuffer,ham0,wfn0,dt,read,hdf_read_tag,MPI_COMM_TG_LOCAL,MPI_COMM_NODE_LOCAL)) {
app_error()<<"Error in PropagatorBase::setup in AFQMCDriver::setup" <<std::endl;
return false;
}
if(myComm->rank() == 0)
read.close();
app_log()<<"\n****************************************************\n"
<<" Initializating Estimators \n"
<<"****************************************************\n"
<<std::endl;
// estimator setup
estim0->setup(TGdata,&CommBuffer,ham0,wfn0,&LocalTimer,MPI_COMM_HEAD_OF_NODES,MPI_COMM_TG_LOCAL,MPI_COMM_NODE_LOCAL,MPI_COMM_TG_LOCAL_HEADS);
app_log()<<"\n****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<" Finished Driver initialization.\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<std::endl;
myComm->barrier();
return true;
}
// writes checkpoint file
bool AFQMCDriver::checkpoint(int block, int step)
{
hdf_archive dump(myComm,false);
if(myComm->rank() == 0) {
std::string file;
char fileroot[128];
int nproc = myComm->size();
if(hdf_write_restart != std::string(""))
file = hdf_write_restart;
else
file = myComm->getName()+std::string(".chk.h5");
if(!dump.create(file)) {
app_error()<<" Error opening checkpoint file for write. \n";
return false;
}
std::vector<RealType> Rdata(2);
Rdata[0]=Eshift;
Rdata[1]=Etav;
std::vector<IndexType> Idata(2);
Idata[0]=block;
Idata[1]=step;
// always write driver data and walkers
dump.push("AFQMCDriver");
if(hdf_write_tag != std::string("")) dump.push(hdf_write_tag);
dump.write(Idata,"DriverInts");
dump.write(Rdata,"DriverReals");
if(hdf_write_tag != std::string("")) dump.pop();
dump.pop();
}
if(!wlkBucket->dumpToHDF5(dump,hdf_write_tag) ) {
app_error()<<" Problems writting checkpoint file in Driver/AFQMCDriver::checkpoint(). \n";
return false;
}
if(myComm->rank() == 0) {
dump.close();
}
return true;
}
// writes samples
bool AFQMCDriver::writeSamples()
{
hdf_archive dump(myComm,false);
if(myComm->rank() == 0) {
std::string file;
char fileroot[128];
int nproc = myComm->size();
file = myComm->getName()+std::string(".confg.h5");
if(!dump.create(file)) {
app_error()<<" Error opening checkpoint file for write. \n";
return false;
}
}
int nwtowrite=-1;
if(!wlkBucket->dumpSamplesHDF5(dump,nwtowrite) ) {
app_error()<<" Problems writting checkpoint file in Driver/AFQMCDriver::writeSample(). \n";
return false;
}
if(myComm->rank() == 0) {
dump.close();
}
return true;
}
// sets up restart archive and reads
bool AFQMCDriver::restart(hdf_archive& read)
{
// always write driver data and walkers
if(!read.push("AFQMCDriver",false)) return false;
if(hdf_read_tag != std::string(""))
if(!read.push(hdf_read_tag,false)) return false;
std::vector<IndexType> Idata(2);
std::vector<RealType> Rdata(2);
if(!read.read(Idata,"DriverInts")) return false;
if(!read.read(Rdata,"DriverReals")) return false;
Eshift = Rdata[0];
Etav = Rdata[1];
block0=Idata[0];
step0=Idata[1];
if(hdf_read_tag != std::string("")) read.pop();
read.pop();
return true;
}
bool AFQMCDriver::clear()
{
return true;
}
void AFQMCDriver::output_timers(std::ofstream& out_timers, int n)
{
if(n==0) out_timers<<"Propagate::applyHSPropagator Propagate::calculateMixedMatrixElementOfOneBodyOperators Propagate::eloc Propagate::product_SD Propagate::sampleGaussianFields Propagate::apply_expvHS_Ohmms Propagate::build_vHS PureSingleDeterminant:calculateMixedMatrixElementOfOneBodyOperators PureSingleDeterminant:evaluateLocalEnergy PureSingleDeterminant:local_evaluateOneBodyMixedDensityMatrix " <<std::endl;
out_timers<<Timer.average("Propagate::applyHSPropagator") <<" "
<<Timer.average("Propagate::calculateMixedMatrixElementOfOneBodyOperators") <<" "
<<Timer.average("Propagate::eloc") <<" "
<<Timer.average("Propagate::eloc2") <<" "
<<Timer.average("Propagate::eloc3") <<" "
<<Timer.average("Propagate::product_SD") <<" "
<<Timer.average("Propagate::sampleGaussianFields") <<" "
<<Timer.average("Propagate::apply_expvHS_Ohmms") <<" "
<<Timer.average("Propagate::build_vHS") <<" "
<<Timer.average("PureSingleDeterminant:calculateMixedMatrixElementOfOneBodyOperators") <<" "
<<Timer.average("PureSingleDeterminant:evaluateLocalEnergy") <<" "
<<Timer.average("PureSingleDeterminant:local_evaluateOneBodyMixedDensityMatrix") <<std::endl;
Timer.reset("Propagate::applyHSPropagator");
Timer.reset("Propagate::calculateMixedMatrixElementOfOneBodyOperators");
Timer.reset("Propagate::eloc");
Timer.reset("Propagate::eloc2");
Timer.reset("Propagate::eloc3");
Timer.reset("Propagate::product_SD");
Timer.reset("Propagate::sampleGaussianFields");
Timer.reset("Propagate::apply_expvHS_Ohmms");
Timer.reset("Propagate::build_vHS");
Timer.reset("PureSingleDeterminant:calculateMixedMatrixElementOfOneBodyOperators");
Timer.reset("PureSingleDeterminant:evaluateLocalEnergy");
Timer.reset("PureSingleDeterminant:local_evaluateOneBodyMixedDensityMatrix");
}
}

85
src/AFQMC/Drivers/AFQMCDriver.h Executable file
View File

@ -0,0 +1,85 @@
#ifndef QMCPLUSPLUS_AFQMC_AFQMCDRIVER_H
#define QMCPLUSPLUS_AFQMC_AFQMCDRIVER_H
#include<Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "AFQMC/config.h"
#include "AFQMC/Drivers/Driver.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Walkers/LocalWalkerHandler.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
namespace qmcplusplus
{
class AFQMCDriver: public Driver
{
typedef HamiltonianBase* HamPtr;
typedef WavefunctionHandler* WfnPtr;
typedef PropagatorBase* PropPtr;
typedef WalkerHandlerBase* WSetPtr;
typedef AFQMCInfo* InfoPtr;
public:
AFQMCDriver(Communicate *c):Driver(c),
compare_libraries(false),debug(false),dShift(1.0),
min_total_weight(0.8),accum_ovlp(false),
diagHam(0),diagHam_freq(10),set_nWalker_target(false),
print_timers(0),samplePeriod(-1)
{
name = "AFQMC";
project_title = "afqmc";
}
~AFQMCDriver() {}
bool run();
bool parse(xmlNodePtr);
bool setup(HamPtr,WSetPtr,PropPtr,WfnPtr);
bool checkpoint(int,int);
bool restart(hdf_archive&);
bool clear();
protected:
bool writeSamples();
bool compare_libraries;
bool debug;
int print_timers;
int samplePeriod;
bool accum_ovlp;
bool set_nWalker_target;
int diagHam;
int diagHam_freq;
// RealType ovlp_cut;
RealType dShift;
RealType min_total_weight;
RealType Eshift;
RealType Etav;
// temporary
LocalWalkerHandler* localwlkBucket;
void output_timers(std::ofstream&,int);
};
}
#endif

116
src/AFQMC/Drivers/Driver.h Executable file
View File

@ -0,0 +1,116 @@
#ifndef QMCPLUSPLUS_AFQMC_DRIVER_H
#define QMCPLUSPLUS_AFQMC_DRIVER_H
#include<Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "AFQMC/config.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Estimators/EstimatorHandler.h"
#include "AFQMC/Utilities/taskgroup.h"
namespace qmcplusplus
{
class Driver: public MPIObjectBase, public AFQMCInfo
{
public:
typedef HamiltonianBase* HamPtr;
typedef WavefunctionHandler* WfnPtr;
typedef PropagatorBase* PropPtr;
typedef WalkerHandlerBase* WSetPtr;
typedef AFQMCInfo* InfoPtr;
Driver(Communicate *c):MPIObjectBase(c),TG(c,"DriverTG"),
nBlock(100),nStep(1),nSubstep(1),
nStabalize(1),nPopulationControl(1),nloadBalance(1),dt(0.01),name(""),
nCheckpoint(-1),block0(0),step0(0),restarted(false),
hdf_write_restart(""),hdf_read_restart(""),nWalkers(5),
hdf_write_tag(""),hdf_read_tag(""),project_title(""),m_series(0),
ncores_per_TG(1)
{}
~Driver() {}
virtual bool run()=0;
void setTitle(std::string& title, int cnt) {
project_title=title;
m_series=cnt;
}
virtual bool parse(xmlNodePtr)=0;
virtual bool setup(HamPtr,WSetPtr,PropPtr,WfnPtr)=0;
virtual bool checkpoint(int,int)=0;
virtual bool restart(hdf_archive&)=0;
virtual bool clear()=0;
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
std::string name;
protected:
int m_series;
std::string project_title;
myTimer LocalTimer;
std::string hdf_read_restart;
std::string hdf_write_restart;
std::string hdf_read_tag;
std::string hdf_write_tag;
bool restarted;
int nBlock;
int nStep;
int nSubstep;
TaskGroup TG;
int ncores_per_TG;
int nWalkers;
int nCheckpoint;
int nStabalize;
int nPopulationControl;
int nloadBalance;
RealType dt;
int block0, step0;
HamPtr ham0;
WfnPtr wfn0;
WSetPtr wlkBucket;
PropPtr prop0;
EstimatorHandler* estim0;
ComplexSMVector CommBuffer;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
MPI_Comm MPI_COMM_NODE_LOCAL;
MPI_Comm MPI_COMM_TG_LOCAL;
MPI_Comm MPI_COMM_TG_LOCAL_HEADS;
};
}
#endif

297
src/AFQMC/Drivers/VMCDriver.cpp Executable file
View File

@ -0,0 +1,297 @@
#include<tuple>
#include<map>
#include<string>
#include<iomanip>
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include<Message/MPIObjectBase.h>
#include "Message/OpenMP.h"
#include "Message/Communicate.h"
#include "Message/CommOperators.h"
#include "OhmmsData/libxmldefs.h"
#include "Configuration.h"
#include <qmc_common.h>
#include "AFQMC/config.h"
#include "AFQMC/Drivers/VMCDriver.h"
#include "AFQMC/Estimators/SlaterDetOperations.h"
namespace qmcplusplus {
bool VMCDriver::run()
{
/*
if(!restarted) {
step0=block0=0;
}
ComplexType enume=0.0,edeno=0.0;
std::vector<RealType> data(10);
RealType accept=0.0;
ComplexType exactEstimatorEnergy;
SlaterDetOperations SDetOps(myComm);
SDetOps.copyInfo(*this);
std::vector<RealType> diagEnergies(diagHam);
ComplexMatrix diagEigVec(1);
if(diagHam > 0 && myComm->size() > 1 ) {
app_error()<<" Error: Diagonalization of hamiltonian in space of walkers only implemented in serial. \n";
return false;
}
SDetOps.setup(ham0,&LocalTimer);
prop0->SDetOps = &SDetOps;
std::ofstream out("vmc.dat", std::ios_base::app | std::ios_base::out);
if(out.fail()) {
app_error()<<"Problems opening output file.\n";
return false;
}
out<<fixed;
if(!restarted) { // assume a field of zero
ComplexMatrix Mat;
for(WalkerHandler::WalkerIterator it=wlkBucket->begin(); it!=wlkBucket->end(); it++)
SDetOps.green_function((it->SlaterMat).data(),(it->SlaterMat).data()+2*NMO*NAEA,it->weight,Mat,false);
}
// problems with using step_tot to do ortho and load balance
int time = step0*nSubstep;
for (int iBlock=block0, step_tot=step0; iBlock<nBlock; ++iBlock) {
LocalTimer.start("Block::TOTAL");
enume=0.0;
edeno=0.0;
for (int iStep=0; iStep<nStep; ++iStep, ++step_tot) {
// propagate
for (int iSubstep=0; iSubstep<nSubstep; ++iSubstep,++time) {
for(WalkerHandler::WalkerIterator it=wlkBucket->begin(); it!=wlkBucket->end(); it++) {
if(!it->alive || std::abs(it->weight) <= 1e-6) continue;
LocalTimer.start("SubStep::Propagate");
prop0->Propagate(time,*it,accept);
LocalTimer.stop("SubStep::Propagate");
}// walkers
} // iSubstep
for(WalkerHandler::WalkerIterator it=wlkBucket->begin(); it!=wlkBucket->end(); it++) {
ComplexType hamME,ovlp;
SDetOps.matrix_element_and_overlap((it->SlaterMat).data(),(it->SlaterMat).data()+2*NMO*NAEA,ovlp,hamME);
register ComplexType w = ovlp/std::abs(ovlp);
enume += w*hamME/ovlp;
edeno += w;
}
}
data[0]=enume.real();
data[1]=edeno.real();
data[2]=accept;
myComm->allreduce(data);
if(diagHam > 0 && iBlock % diagHam_freq == 0 && iBlock > 200 ) {
SDetOps.diag(wlkBucket->begin(),wlkBucket->end(),diagHam,diagEnergies,diagEigVec,exactEstimatorEnergy,&wfn0->getHF());
}
if(myComm->rank() == 0) {
out<<iBlock <<" " <<std::setprecision(6) <<accept/((iBlock-block0+1)*nSubstep*nStep*myComm->size()) <<" " <<data[0]/data[1];
if(diagHam > 0) {
for(int i=0; i<diagHam; i++) out<<" " <<diagEnergies[i];
out<<" " <<exactEstimatorEnergy.real();
}
out<<" " <<LocalTimer.average("Block::TOTAL") <<std::endl;
}
// add estimators here
// checkpoint
if(iBlock != 0 && iBlock % nCheckpoint == 0)
if(!checkpoint(iBlock,step_tot)) {
app_error()<<" Error in VMCDriver::checkpoint(). \n" <<std::endl;
return false;
}
LocalTimer.stop("Block::TOTAL");
}
app_log()<<"----------------------------------------------------------------\n";
app_log()<<" LocalTimer: \n";
LocalTimer.print_average_all(app_log());
app_log()<<" Timer: \n";
Timer.print_average_all(app_log());
app_log()<<"----------------------------------------------------------------\n";
out.close();
*/
return true;
}
bool VMCDriver::parse(xmlNodePtr cur)
{
if(cur==NULL) return false;
std::string str;
ParameterSet m_param;
m_param.add(nBlock,"blocks","int");
m_param.add(nStep,"steps","int");
m_param.add(nSubstep,"substeps","int");
m_param.add(nWalkers,"nWalkers","int");
m_param.add(nCheckpoint,"checkpoint","int");
m_param.add(dt,"dt","double");
m_param.add(dt,"timestep","double");
m_param.add(diagHam,"diagHam","int");
m_param.add(hdf_read_tag,"hdf_read_tag","std::string");
m_param.add(hdf_read_restart,"hdf_read_file","std::string");
m_param.add(hdf_write_tag,"hdf_write_tag","std::string");
m_param.add(hdf_write_restart,"hdf_write_file","std::string");
m_param.put(cur);
return true;
}
bool VMCDriver::setup(HamPtr h0, WSetPtr w0, PropPtr p0, WfnPtr wf0)
{
ham0=h0;
wlkBucket=w0;
prop0=p0;
wfn0=wf0;
restarted=false;
app_log()<<"\n****************************************************\n"
<<" Beginning VMC Driver initialization.\n"
<<"****************************************************\n"
<<std::endl;
hdf_archive read(myComm);
if(myComm->rank() == 0) {
if(hdf_read_restart != std::string("")) {
if(read.open(hdf_read_restart,H5F_ACC_RDONLY,false))
restarted = restart(read);
if(!restarted) {
read.close();
app_log()<<" WARNING: Problems restarting simulation. Starting from default settings. \n";
}
}
}
myComm->bcast(restarted);
// hamiltonian
// if(!ham0->init()) {
// app_error()<<"Error initializing Hamiltonian in VCDriver::setup" <<std::endl;
// return false;
// }
// wavefunction
//if(!wfn0->init(read,hdf_read_tag)) {
// app_error()<<"Error initializing Wavefunction in VMCDriver::setup" <<std::endl;
// return false;
//}
//if(!wfn0->setup(ham0)) {
// app_error()<<"Error in WavefunctionHandler::setup in VMCDriver::setup" <<std::endl;
// return false;
//}
// walker set
// wlkBucket->setup(,ncores_per_TG);
// A VMC Walker has 2 states, so it is size (4*NMO,NAEA)
ComplexMatrix HF;
HF.resize(4*NMO,NAEA);
for(int i=0; i<NAEA; i++) HF(i,i)=ComplexType(1.0,0.0);
for(int i=0; i<NAEB; i++) HF(NMO+i,i)=ComplexType(1.0,0.0);
for(int i=0; i<NAEA; i++) HF(2*NMO+i,i)=ComplexType(1.0,0.0);
for(int i=0; i<NAEB; i++) HF(3*NMO+i,i)=ComplexType(1.0,0.0);
wlkBucket->setHF(HF);
if(restarted) {
wlkBucket->restartFromHDF5(nWalkers,read,hdf_read_tag,false);
} else {
wlkBucket->initWalkers(nWalkers);
}
// propagator
// if(!prop0->setup(core_rank,ncores_per_TG,ham0,wfn0,dt,read,hdf_read_tag)) {
// app_error()<<"Error in PropagatorBase::setup in VMCDriver::setup" <<std::endl;
// return false;
// }
app_log()<<"\n****************************************************\n"
<<" Finished Driver initialization.\n"
<<"****************************************************\n"
<<std::endl;
return true;
}
// writes checkpoint file
bool VMCDriver::checkpoint(int block, int step)
{
hdf_archive dump(myComm,false);
if(myComm->rank() == 0) {
std::string file;
char fileroot[128];
int nproc = myComm->size();
int nodeid = myComm->rank();
int groupid=myComm->getGroupID();
bool no_gtag= (qmc_common.mpi_groups==1);
if(no_gtag)
sprintf(fileroot,"%s.s%03d",project_title.c_str(),m_series);
else
sprintf(fileroot,"%s.g%03d.s%03d",project_title.c_str(),groupid,m_series);
if(hdf_write_restart != std::string(""))
file = hdf_write_restart;
else
file = std::string(fileroot)+std::string(".chk.h5");
if(!dump.create(file)) {
app_error()<<" Error opening checkpoint file for write. \n";
return false;
}
std::vector<IndexType> Idata(2);
Idata[0]=block;
Idata[1]=step;
// always write driver data and walkers
dump.push("VMCDriver");
if(hdf_write_tag != std::string("")) dump.push(hdf_write_tag);
dump.write(Idata,"DriverInts");
//dump.write(Rdata,"DriverReals");
if(hdf_write_tag != std::string("")) dump.pop();
dump.pop();
}
if(!wlkBucket->dumpToHDF5(dump,hdf_write_tag) ) {
app_error()<<" Problems writting checkpoint file in Driver/VMCDriver::checkpoint(). \n";
return false;
}
if(myComm->rank() == 0) {
dump.close();
}
return true;
}
// sets up restart archive and reads
bool VMCDriver::restart(hdf_archive&)
{
return true;
}
bool VMCDriver::clear()
{
return true;
}
}

57
src/AFQMC/Drivers/VMCDriver.h Executable file
View File

@ -0,0 +1,57 @@
#ifndef QMCPLUSPLUS_AFQMC_VMCDRIVER_H
#define QMCPLUSPLUS_AFQMC_VMCDRIVER_H
#include<Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "AFQMC/config.h"
#include "AFQMC/Drivers/Driver.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
namespace qmcplusplus
{
class VMCDriver: public Driver
{
typedef HamiltonianBase* HamPtr;
typedef WavefunctionHandler* WfnPtr;
typedef PropagatorBase* PropPtr;
typedef WalkerHandlerBase* WSetPtr;
typedef AFQMCInfo* InfoPtr;
public:
VMCDriver(Communicate *c):Driver(c),
diagHam(0),diagHam_freq(10)
{
name = "VMC";
project_title = "vmc";
}
~VMCDriver() {}
bool run();
bool parse(xmlNodePtr);
bool setup(HamPtr,WSetPtr,PropPtr,WfnPtr);
bool checkpoint(int,int);
bool restart(hdf_archive&);
bool clear();
protected:
int diagHam;
int diagHam_freq;
};
}
#endif

View File

@ -0,0 +1,470 @@
#include<tuple>
#include<map>
#include<string>
#include<iomanip>
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include<Message/MPIObjectBase.h>
#include "Message/OpenMP.h"
#include "Message/Communicate.h"
#include "Message/CommOperators.h"
#include "OhmmsData/libxmldefs.h"
#include "Configuration.h"
#include <qmc_common.h>
#include "AFQMC/config.h"
#include "AFQMC/Drivers/selectedCI.h"
#include "AFQMC/Utilities/Utils.h"
namespace qmcplusplus {
bool selectedCI::run()
{
app_log()<<" Running selectedCI. \n";
int ne = NAEA+NAEB;
std::vector<IndexType> intm;
intm.reserve(ne*100000);
std::vector<RealType> eigVal(1);
std::vector<IndexType> vira,virb;
std::vector<IndexType> indx(ne);
vira.reserve(NMO-NAEA);
virb.reserve(NMO-NAEA);
ComplexMatrix eigVec;
occ_orbs.reserve(ne*100000);
ci.reserve(100000);
for(int i=0; i<NAEA; i++)
occ_orbs.push_back(i);
for(int i=NMO; i<NMO+NAEB; i++)
occ_orbs.push_back(i);
ci.push_back(1.0);
int nn = occ_orbs.size()/ne;
for(int i=0; i<nn; i++)
std::sort(occ_orbs.begin()+i*ne,occ_orbs.begin()+(i+1)*ne);
// ideas for speed-ups
// 1. Use the fact that H is stored in sparse ordered form to
// only consider terms that have hmat*ci > cut
// To do this, make a routine taht given (i,j) it gets all
// (k,l) such that h(i,j,k,l)*ci > cut
// 2. Keep a different list for those determinants added in the last
// step (those new to the list that haven't been "excited" before)
// This is a reasonable approximation since the ci coeffs do not change
// much. Then only excite from this list.
// 3. For parallelization, we need to solve both hamiltonian storage (which will be bigger now)
// and fare share of work. For now, assume hamiltonian is
// 4. use (real) parallel? sparse eigenvalue solver that produces only a given # of states
for(int i=0; i<maxit; i++) {
intm.clear();
int nci = occ_orbs.size()/ne;
int nterms=0;
Timer.reset("Generic");
Timer.start("Generic");
std::vector<IndexType>::iterator it=occ_orbs.begin();
for(int nc = 0; nc<nci; nc++, it+=ne) {
vira.clear();
virb.clear();
for(int iv=0; iv<NMO; iv++)
if(!binary_search (it, it+NAEA, iv)) vira.push_back(iv);
for(int iv=NMO; iv<2*NMO; iv++)
if(!binary_search (it+NAEA, it+ne, iv)) virb.push_back(iv);
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
if(std::search(intm.begin(),intm.end(),it,it+ne)==intm.end()) {
for(int ii=0; ii<ne; ii++) intm.push_back(*(it+ii));
nterms++;
}
// double excitations
for(int ia=0; ia<NAEA; ia++) {
int a = *(it+ia);
// aa
for(int ib=ia+1; ib<NAEA; ib++) {
int b = *(it+ib);
for(int ic=0; ic<NMO-NAEA; ic++) {
int c = vira[ic];
for(int id=ic+1; id<NMO-NAEA; id++) {
int d = vira[id];
ValueType Hij = sHam->H(a,b,c,d) - sHam->H(a,b,d,c);
if(std::abs(Hij*ci[nc]) > cutoff_list) {
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
int sz = intm.size();
indx.clear();
for(int ii=0; ii<ne; ii++) indx.push_back(*(it+ii));
indx[ia] = c;
indx[ib] = d;
std::sort(indx.begin(),indx.end());
if(std::search(intm.begin(),intm.end(),indx.begin(),indx.end())==intm.end()) {
for(int ii=0; ii<ne; ii++) intm.push_back(indx[ii]);
nterms++;
}
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
sz = intm.size();
indx.clear();
for(int ii=0; ii<ne; ii++) indx.push_back(*(it+ii));
indx[ia] = d;
indx[ib] = c;
std::sort(indx.begin(),indx.end());
if(std::search(intm.begin(),intm.end(),indx.begin(),indx.end())==intm.end()) {
for(int ii=0; ii<ne; ii++) intm.push_back(indx[ii]);
nterms++;
}
}
}
}
}
// ab
for(int ib=NAEA; ib<ne; ib++) {
int b = *(it+ib);
for(int ic=0; ic<NMO-NAEA; ic++) {
int c = vira[ic];
for(int id=0; id<NMO-NAEB; id++) {
int d = virb[id];
ValueType Hij = sHam->H(a,b,c,d);
if(std::abs(Hij*ci[nc]) > cutoff_list) {
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
int sz = intm.size();
indx.clear();
for(int ii=0; ii<ne; ii++) indx.push_back(*(it+ii));
indx[ia] = c;
indx[ib] = d;
std::sort(indx.begin(),indx.end());
if(std::search(intm.begin(),intm.end(),indx.begin(),indx.end())==intm.end()) {
for(int ii=0; ii<ne; ii++) intm.push_back(indx[ii]);
nterms++;
}
}
}
}
}
}
for(int ia=NAEA; ia<ne; ia++) {
int a = *(it+ia);
for(int ib=ia+1; ib<ne; ib++) {
int b = *(it+ib);
for(int ic=0; ic<NMO-NAEB; ic++) {
int c = virb[ic];
for(int id=ic+1; id<NMO-NAEB; id++) {
int d = virb[id];
ValueType Hij = sHam->H(a,b,c,d) - sHam->H(a,b,d,c);
if(std::abs(Hij*ci[nc]) > cutoff_list) {
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
int sz = intm.size();
indx.clear();
for(int ii=0; ii<ne; ii++) indx.push_back(*(it+ii));
indx[ia] = c;
indx[ib] = d;
std::sort(indx.begin(),indx.end());
if(std::search(intm.begin(),intm.end(),indx.begin(),indx.end())==intm.end()) {
for(int ii=0; ii<ne; ii++) intm.push_back(indx[ii]);
nterms++;
}
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
sz = intm.size();
indx.clear();
for(int ii=0; ii<ne; ii++) indx.push_back(*(it+ii));
indx[ia] = d;
indx[ib] = c;
std::sort(indx.begin(),indx.end());
if(std::search(intm.begin(),intm.end(),indx.begin(),indx.end())==intm.end()) {
for(int ii=0; ii<ne; ii++) intm.push_back(indx[ii]);
nterms++;
}
}
}
}
}
}
} // states in occ_orbs
Timer.stop("Generic");
app_log()<<" Iteration: " <<i <<std::endl;
app_log()<<" Intermediate list has " <<nterms <<" terms" <<std::endl;
Timer.reset("Generic1");
Timer.start("Generic1");
bool sucess = diagonalizeTrialWavefunction(eigVal,eigVec,intm,nterms);
if(!sucess) {
app_error()<<" Error: Problems with diagonalizeTrialWavefunction. \n";
return false;
}
app_log()<<" Time to generate hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic3") <<std::endl;
app_log()<<" Time to diagonalize hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic4") <<std::endl;
occ_orbs.reserve(intm.size());
occ_orbs.clear();
ci.clear();
ci.reserve(nterms);
for(int ii=0,nt=0; ii<nterms; ii++) {
//app_log()<<"ci " <<ii <<" " <<eigVec(0,ii) <<std::endl;
if(std::abs(eigVec(0,ii)) > cutoff_diag) {
ci.push_back(eigVec(0,ii));
for(int j=0; j<ne; j++)
occ_orbs.push_back(intm[ii*ne+j]);
nt++;
}
}
Timer.stop("Generic1");
std::ofstream out("iterativeCI.dat");
if(out.fail()) {
app_error()<<" Problems opening iterativeCI.dat \n";
return false;
}
{
std::vector<std::tuple<double,int> > dets(ci.size());
for(int i=0; i<ci.size(); i++) dets[i] = std::make_tuple( std::abs(ci[i]),i);
std::sort( dets.begin(), dets.end(),
[] (const std::tuple<double,int>& a, const std::tuple<double,int>& b)
{return (std::get<0>(a)>std::get<0>(b));} );
out<<" &FCI \n NCI = " <<ci.size() <<" \n /\n";
for(int i=0; i<ci.size(); i++) {
out<<ci[std::get<1>(dets[i])] <<" ";
for(int j=0; j<ne; j++) out<<occ_orbs[ std::get<1>(dets[i])*ne+j]+1 <<" ";
out<<"\n";
}
out.close();
}
app_log()<<" Energy: " <<eigVal[0]+NuclearCoulombEnergy <<std::endl;
app_log()<<" Number of determinants after truncation: " <<occ_orbs.size()/ne <<std::endl;
app_log()<<" Timings: " <<Timer.total("Generic") <<" " <<Timer.total("Generic1") <<std::endl;
} // iteration
if(diag_in_steps>0) {
app_log()<<"\n***********************************************\n #Determinants Energy: " <<"\n";
std::vector<std::tuple<double,int> > dets(ci.size());
for(int i=0; i<ci.size(); i++) dets[i] = std::make_tuple( std::abs(ci[i]),i);
std::sort( dets.begin(), dets.end(),
[] (const std::tuple<double,int>& a, const std::tuple<double,int>& b)
{return (std::get<0>(a)>std::get<0>(b));} );
for(int i=1; i<ci.size(); i+=diag_in_steps) {
intm.clear();
for(int ki=0; ki<i; ki++) {
int kk = std::get<1>(dets[ki]);
for(int kj=0; kj<ne; kj++) intm.push_back(occ_orbs[ kk*ne+kj]);
}
bool sucess = diagonalizeTrialWavefunction(eigVal,eigVec,intm,i,false);
if(!sucess) {
app_error()<<" Error: Problems with diagonalizeTrialWavefunction. \n";
return false;
}
app_log()<<i <<" " <<eigVal[0]+NuclearCoulombEnergy <<std::endl;
}
app_log()<<"***********************************************" <<std::endl <<std::endl;
}
return true;
}
bool selectedCI::diagonalizeTrialWavefunction(std::vector<RealType>& eigVal, ComplexMatrix& eigVec, std::vector<IndexType>& occv, int nci, bool eigV )
{
ComplexType one = ComplexType(1.0,0.0);
ComplexType zero = ComplexType(0.0,0.0);
bool sucess;
for(int i=0; i<nci; i++)
std::sort(occv.begin()+i*(NAEA+NAEB),occv.begin()+(i+1)*(NAEA+NAEB));
if(myComm->rank()==0) {
Timer.reset("Generic3");
Timer.start("Generic3");
ComplexMatrix hm(nci,nci);
ComplexMatrix ov(nci,nci);
ComplexType let;
RealType sg;
std::vector<IndexType> occ(NAEA+NAEB);
IndexType n0,n1,n2,n3;
std::vector<IndexType> DL(NAEA+NAEB);
std::vector<IndexType> DR(NAEA+NAEB);
// don't rely on H2_2bar in case it is removed
for(int ki=0; ki<nci; ki++) {
// i==j
let=zero;
std::vector<IndexType>::iterator it = occv.begin()+ki*(NAEA+NAEB);
for(int i=0; i<NAEA+NAEB; i++)
{
let += sHam->H(*(it+i),*(it+i));
for(int j=i+1; j<NAEA+NAEB; j++) {
let += sHam->H(*(it+i),*(it+j),*(it+i),*(it+j)) - sHam->H(*(it+i),*(it+j),*(it+j),*(it+i));
}
}
ov(ki,ki) = one;
hm(ki,ki) = let;
for(int kj=ki+1; kj<nci; kj++) {
std::vector<IndexType>::iterator jt = occv.begin()+kj*(NAEA+NAEB);
std::copy(it,it+NAEA+NAEB,DL.begin());
std::copy(jt,jt+NAEA+NAEB,DR.begin());
int cnt = cntExcitations(NAEA,NAEB,DL,DR,n0,n1,n2,n3,occ,sg);
if(cnt==0) {
app_error()<<" Error: Found repeated determinant in trial wave function in MultiPureSingleDeterminant \n";
return false;
} else if(cnt==2) {
int nterms = NAEA+NAEB-1;
let=sHam->H(n0,n1);
for(int i=0; i<nterms; i++)
let+=sHam->H(n0,occ[i],n1,occ[i]) - sHam->H(n0,occ[i],occ[i],n1);
hm(ki,kj)=let*sg;
} else if(cnt==4) {
hm(ki,kj) = sg*(sHam->H(n0,n1,n2,n3) - sHam->H(n0,n1,n3,n2));
} else {
hm(ki,kj) = zero;
}
ov(ki,kj) = ov(kj,ki) = zero;
hm(kj,ki) = myconj(hm(ki,kj));
}
}
Timer.stop("Generic3");
//app_log()<<" Time to generate hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic2") <<std::endl;
Timer.reset("Generic4");
Timer.start("Generic4");
eigVal.resize(1);
eigVec.resize(1,nci);
std::vector<int> ifail(nci);
sucess = DenseMatrixOperators::genHermitianEigenSysSelect(nci,hm.data(),nci,ov.data(),nci,1,eigVal.data(),eigV,eigVec.data(),eigVec.size2(),ifail.data());
Timer.stop("Generic4");
//app_log()<<" Time to diagonalize hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic2") <<std::endl;
} else {
eigVal.resize(1);
eigVec.resize(1,nci);
}
myComm->bcast(sucess);
myComm->bcast(eigVal.data(),eigVal.size(),0,myComm->getMPI());
myComm->bcast(eigVec.data(),eigVec.size1()*eigVec.size2(),0,myComm->getMPI());
return sucess;
}
bool selectedCI::parse(xmlNodePtr cur)
{
if(cur==NULL) return false;
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
maxit=0;
cutoff_list=cutoff_diag=0;
build_full_hamiltonian = true;
std::string str("yes");
ParameterSet m_param;
m_param.add(str,"full_hamiltonian","std::string");
m_param.add(output_filename,"output_filename","std::string");
m_param.add(output_filename,"output","std::string");
m_param.add(cutoff_list,"cutoff_list","double");
m_param.add(cutoff_diag,"cutoff_diag","double");
m_param.add(maxit,"maxit","int");
m_param.add(diag_in_steps,"diag_steps","int");
m_param.put(cur);
std::transform(str.begin(),str.end(),str.begin(),(int (*)(int)) tolower);
if(str == "no" || str == "false") build_full_hamiltonian = false;
return true;
}
bool selectedCI::setup(HamPtr h0, WSetPtr w0, PropPtr p0, WfnPtr wf0)
{
if(h0==NULL) {
app_error()<<" Error: Null Hamiltonian pointer in selectedCI::setup(). \n";
return false;
}
ham0=h0;
wlkBucket=NULL;
prop0=NULL;
wfn0=NULL;
sHam = dynamic_cast<SparseGeneralHamiltonian*>(ham0);
if(!sHam) {
app_error()<<" Error in MultiPureSingleDeterminant::getHamiltonian. \n"
<<" Hamiltonian associated with MultiPureSingleDeterminant must of the \n"
<<" type SparseGeneralHamiltonian. \n";
APP_ABORT("");
}
app_log()<<"\n****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<" Beginning Driver initialization.\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<std::endl;
app_log()<<" Using " <<ncores_per_TG <<" cores per node in a TaskGroup. \n";
// right now this TG is not used. It is needed for setup purposes and to
// get a unique TG number for every group of cores on a node (used in the WalkerSet)
TG.setup(ncores_per_TG,1,false);
std::vector<int> TGdata(5);
TG.getSetupInfo(TGdata);
CommBuffer.setup(TG.getCoreRank()==0,std::string("COMMBuffer_")+std::to_string(myComm->rank()),ncores_per_TG);
TG.setBuffer(&CommBuffer);
app_log()<<"\n****************************************************\n"
<<" Initializating Hamiltonian \n"
<<"****************************************************\n"
<<std::endl;
// hamiltonian
if(!ham0->init(TGdata,&CommBuffer)) {
app_error()<<"Error initializing Hamiltonian in selectedCI::setup" <<std::endl;
return false;
}
NuclearCoulombEnergy = static_cast<ValueType>(sHam->NuclearCoulombEnergy);
app_log()<<"\n****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<" Finished Driver initialization.\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<std::endl;
return true;
}
// writes checkpoint file
bool selectedCI::checkpoint(int block, int step)
{
return true;
}
// sets up restart archive and reads
bool selectedCI::restart(hdf_archive& read)
{
return true;
}
bool selectedCI::clear()
{
return true;
}
}

666
src/AFQMC/Drivers/selectedCI.cpp Executable file
View File

@ -0,0 +1,666 @@
#include<tuple>
#include<map>
#include<string>
#include<iomanip>
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include<Message/MPIObjectBase.h>
#include "Message/OpenMP.h"
#include "Message/Communicate.h"
#include "Message/CommOperators.h"
#include "OhmmsData/libxmldefs.h"
#include "Configuration.h"
#include <qmc_common.h>
#include "AFQMC/config.h"
#include "AFQMC/Drivers/selectedCI.h"
#include "AFQMC/Utilities/Utils.h"
namespace qmcplusplus {
bool selectedCI::run()
{
app_log()<<" Running selectedCI. \n";
int ne = NAEA+NAEB;
std::vector<IndexType> intm;
std::vector<IndexType> new_dets;
std::vector<ValueType> new_ci;
intm.reserve(ne*100000);
std::vector<RealType> eigVal(1);
std::vector<IndexType> vira,virb;
std::vector<IndexType> indx(ne);
std::vector<OrbitalType> KLs;
vira.reserve(NMO-NAEA);
virb.reserve(NMO-NAEA);
ValueMatrix eigVec;
// generate excitation tables and hamiltonian
sHam->generate_selCI_Ham(cutoff_list);
occ_orbs.clear();
ci.clear();
occ_orbs.reserve(ne*100000);
ci.reserve(100000);
new_dets.reserve(ne*100000);
new_ci.reserve(100000);
// occ_orbs: determinants in the permanent list that have already been excited from
// new_dets: determinants generated in the previous iteration, from which excitations are generated
// in the current iteration
for(int i=0; i<NAEA; i++)
new_dets.push_back(i);
for(int i=NMO; i<NMO+NAEB; i++)
new_dets.push_back(i);
new_ci.push_back(1.0);
// ideas for speed-ups
// 1. Use the fact that H is stored in sparse ordered form to
// only consider terms that have hmat*ci > cut
// To do this, make a routine taht given (i,j) it gets all
// (k,l) such that h(i,j,k,l)*ci > cut
// 2. Keep a different list for those determinants added in the last
// step (those new to the list that haven't been "excited" before)
// This is a reasonable approximation since the ci coeffs do not change
// much. Then only excite from this list.
// 3. For parallelization, we need to solve both hamiltonian storage (which will be bigger now)
// and fare share of work. For now, assume hamiltonian is
// 4. use (real) parallel? sparse eigenvalue solver that produces only a given # of states
for(int ite=0; ite<maxit; ite++) {
app_log()<<" Iteration: " <<ite <<std::endl;
intm.clear();
int nci = new_dets.size()/ne;
int nterms=0;
Timer.reset("Generic");
Timer.start("Generic");
std::vector<IndexType>::iterator it=new_dets.begin();
if((occ_orbs.size()+new_dets.size()) > occ_orbs.capacity()) occ_orbs.reserve( occ_orbs.size()+new_dets.size() + 10000*ne );
if((ci.size()+new_ci.size()) > ci.capacity()) ci.reserve( ci.size()+new_ci.size() + 10000 );
for(int nc = 0; nc<nci; nc++, it+=ne) {
vira.clear();
virb.clear();
for(int iv=0; iv<NMO; iv++)
if(!binary_search (it, it+NAEA, iv)) vira.push_back(iv);
for(int iv=NMO; iv<2*NMO; iv++)
if(!binary_search (it+NAEA, it+ne, iv)) virb.push_back(iv);
if(nterms*ne == intm.capacity()) intm.reserve(ne*(nterms+10000));
// add new_dets[nc] to occ_orbs
ci.push_back(new_ci[nc]);
for(int ii=0; ii<ne; ii++)
occ_orbs.push_back(*(it+ii));
double cut = cutoff_list/std::abs(new_ci[nc]);
// double excitations
for(int ia=0; ia<NAEA; ia++) {
OrbitalType a = *(it+ia);
// aa
for(int ib=ia+1; ib<NAEA; ib++) {
OrbitalType b = *(it+ib);
sHam->get_selCI_excitations(a,b,0,cut,&(*it),KLs);
if((nterms+KLs.size())*ne >= intm.capacity()) intm.reserve(ne*(nterms+KLs.size()+10000));
for(std::vector<OrbitalType>::iterator itkl=KLs.begin(); itkl<KLs.end(); itkl+=2) {
for(int ii=0; ii<ne; ii++) indx[ii] = *(it+ii);
indx[ia] = *(itkl);
indx[ib] = *(itkl+1);
std::sort(indx.begin(),indx.end());
intm.insert(intm.end(),std::begin(indx),std::end(indx));
nterms++;
}
}
// ab
for(int ib=NAEA; ib<ne; ib++) {
OrbitalType b = *(it+ib);
sHam->get_selCI_excitations(a,b,1,cut,&(*it),KLs);
if((nterms+KLs.size())*ne >= intm.capacity()) intm.reserve(ne*(nterms+KLs.size()+10000));
for(std::vector<OrbitalType>::iterator itkl=KLs.begin(); itkl<KLs.end(); itkl+=2) {
for(int ii=0; ii<ne; ii++) indx[ii] = *(it+ii);
indx[ia] = *(itkl);
indx[ib] = *(itkl+1);
std::sort(indx.begin(),indx.end());
intm.insert(intm.end(),std::begin(indx),std::end(indx));
nterms++;
}
}
}
for(int ia=NAEA; ia<ne; ia++) {
int a = *(it+ia);
for(int ib=ia+1; ib<ne; ib++) {
int b = *(it+ib);
sHam->get_selCI_excitations(a,b,3,cut,&(*it),KLs);
if((nterms+KLs.size())*ne >= intm.capacity()) intm.reserve(ne*(nterms+KLs.size()+10000));
for(std::vector<OrbitalType>::iterator itkl=KLs.begin(); itkl<KLs.end(); itkl+=2) {
for(int ii=0; ii<ne; ii++) indx[ii] = *(it+ii);
indx[ia] = *(itkl);
indx[ib] = *(itkl+1);
std::sort(indx.begin(),indx.end());
intm.insert(intm.end(),std::begin(indx),std::end(indx));
nterms++;
}
}
}
} // states in new_dets
Timer.stop("Generic");
app_log()<<"Time to generate excitations: " <<Timer.total("Generic") <<std::endl;
if(occ_orbs.size() == 0) {
app_error()<<" Error in selectedCI::run(): Main determinant list is empty. \n";
APP_ABORT(" Error in selectedCI::run(): Main determinant list is empty. \n");
}
if(ci.size() == 0) {
app_error()<<" Error in selectedCI::run(): Main ci determinant list is empty. \n";
APP_ABORT(" Error in selectedCI::run(): Main ci determinant list is empty. \n");
}
if(ci.size() != occ_orbs.size()/ne) {
app_error()<<" Error in selectedCI::run(): Main determinant list size is inconsistent, ci, dets: " <<ci.size() <<" " <<occ_orbs.size() <<" \n";
APP_ABORT(" Error in selectedCI::run(): Main determinant list size is inconsistent. \n");
}
Timer.reset("Generic");
Timer.start("Generic");
// sort occ_orbs/ci
new_dets.clear();
new_ci.clear();
if(ci.size() > 1)
sort_multiple(occ_orbs.begin(),occ_orbs.end()-ne,ci.begin(),ci.end()-1);
/*
//debug
int ntr = occ_orbs.size()/ne;
for(int i=0; i<ntr; i++)
for(int k=i+1; k<ntr; k++)
if(list_order(occ_orbs.begin()+k*ne,occ_orbs.begin()+i*ne)) {
app_error()<<" Error with order of occ_orbs. \n";
return false;
}
*/
// clean intm list
app_log()<<" Intermediate list has " <<nterms <<" new terms (before cleanup)" <<std::endl;
if(intm.size() > 0) {
val_at_pivot.resize(NAEA+NAEB);
sort_list(intm.begin(),intm.end()-ne);
/*
//debug
ntr = intm.size()/ne;
for(int i=0; i<ntr; i++)
for(int k=i+1; k<ntr; k++)
if(list_order(intm.begin()+k*ne,intm.begin()+i*ne)) {
app_error()<<" Error with order of intm. \n";
return false;
}
*/
remove_repeated(intm,occ_orbs);
/*
//debug
int ntr1 = occ_orbs.size()/ne;
ntr = intm.size()/ne;
for(int i=0; i<ntr1; i++)
for(int k=0; k<ntr; k++)
if(list_equal(occ_orbs.begin()+i*ne,intm.begin()+k*ne)) {
app_error()<<" Error: Repeated elements after call to remove_repeated. \n";
return false;
}
*/
if(intm.size()%ne != 0) APP_ABORT("Error: After remove_repeated. \n\n\n");
nterms = intm.size()/ne;
} else {
nterms=0;
}
Timer.stop("Generic");
app_log()<<"Time to sort and clean new list of dets: " <<Timer.total("Generic") <<std::endl;
app_log()<<" Intermediate list has " <<nterms <<" new terms" <<std::endl;
if(nterms == 0) {
app_log()<<" Intermediate determinant list is empty. Stopping iterations. \n";
break;
}
bool sucess = diagonalizeTrialWavefunction(eigVal,eigVec,occ_orbs,occ_orbs.size()/ne,intm,intm.size()/ne,true);
if(!sucess) {
app_error()<<" Error: Problems with diagonalizeTrialWavefunction. \n";
return false;
}
app_log()<<" Time to generate hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic3") <<std::endl;
app_log()<<" Time to diagonalize hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic4") <<std::endl;
int cnt=0;
nci = ci.size();
RealType normlz = 0;
for(int ii=0; ii<nci; ii++)
normlz += mynorm(eigVec(0,ii));
for(int ii=0; ii<nterms; ii++)
if(std::abs(eigVec(0,ii+nci)) > cutoff_diag) {
cnt++;
normlz += mynorm(eigVec(0,ii+nci));
}
normlz = std::sqrt(normlz);
app_log()<<" Normalization of ci vector: " <<normlz <<std::endl;
new_dets.reserve(cnt);
new_ci.reserve(cnt);
for(int ii=0; ii<nci; ii++)
ci[ii] = eigVec(0,ii)/normlz;
for(int ii=0; ii<nterms; ii++) {
if(std::abs(eigVec(0,ii+nci)) > cutoff_diag) {
new_ci.push_back(eigVec(0,ii+nci)/normlz);
for(int j=0; j<ne; j++)
new_dets.push_back(intm[ii*ne+j]);
}
}
std::ofstream out("iterativeCI.dat");
if(out.fail()) {
app_error()<<" Problems opening iterativeCI.dat \n";
return false;
}
{
std::vector<std::tuple<double,int> > dets;
dets.reserve(ci.size()+new_ci.size());
for(int i=0; i<ci.size(); i++) dets.push_back(std::make_tuple( std::abs(ci[i]),i+1));
for(int i=0; i<new_ci.size(); i++) dets.push_back(std::make_tuple( std::abs(new_ci[i]),-(i+1)));
std::sort( dets.begin(), dets.end(),
[] (const std::tuple<double,int>& a, const std::tuple<double,int>& b)
{return (std::get<0>(a)>std::get<0>(b));} );
out<<" &FCI \n NCI = " <<dets.size() <<" \n /\n";
for(int i=0; i<dets.size(); i++) {
if(std::get<1>(dets[i]) > 0) {
out<<ci[std::get<1>(dets[i])-1] <<" ";
int nt = (std::get<1>(dets[i])-1)*ne;
for(int j=0; j<ne; j++) out<<occ_orbs[nt+j]+1 <<" ";
out<<"\n";
} else {
int nt = -std::get<1>(dets[i])-1;
out<<new_ci[nt] <<" ";
for(int j=0; j<ne; j++) out<<new_dets[nt*ne+j]+1 <<" ";
out<<"\n";
}
}
out.close();
}
app_log()<<" Energy: " <<eigVal[0]+NuclearCoulombEnergy <<std::endl;
app_log()<<" Number of determinants after truncation: " <<(occ_orbs.size()+new_dets.size())/ne <<std::endl;
} // iteration
if(diag_in_steps>0) {
if((occ_orbs.size()+new_dets.size()) > occ_orbs.capacity()) occ_orbs.reserve( occ_orbs.size()+new_dets.size() );
if((ci.size()+new_ci.size()) > ci.capacity()) ci.reserve( ci.size()+new_ci.size() );
for(int nc=0; nc<new_ci.size(); nc++) {
ci.push_back(new_ci[nc]);
for(int ii=0; ii<ne; ii++)
occ_orbs.push_back(*(new_dets.begin()+nc*ne+ii));
}
new_dets.clear();
new_ci.clear();
sort_multiple(occ_orbs.begin(),occ_orbs.end()-ne,ci.begin(),ci.end()-1);
app_log()<<"\n***********************************************\n #Determinants Energy: " <<"\n";
std::vector<std::tuple<double,int> > dets(ci.size());
for(int i=0; i<ci.size(); i++) dets[i] = std::make_tuple( std::abs(ci[i]),i);
std::sort( dets.begin(), dets.end(),
[] (const std::tuple<double,int>& a, const std::tuple<double,int>& b)
{return (std::get<0>(a)>std::get<0>(b));} );
for(int i=1; i<ci.size(); i+=diag_in_steps) {
intm.clear();
for(int ki=0; ki<i; ki++) {
int kk = std::get<1>(dets[ki]);
for(int kj=0; kj<ne; kj++) intm.push_back(occ_orbs[ kk*ne+kj]);
}
bool sucess = diagonalizeTrialWavefunction(eigVal,eigVec,intm,i,new_dets,0,false);
if(!sucess) {
app_error()<<" Error: Problems with diagonalizeTrialWavefunction. \n";
return false;
}
app_log()<<i <<" " <<eigVal[0]+NuclearCoulombEnergy <<std::endl;
}
app_log()<<"***********************************************" <<std::endl <<std::endl;
}
return true;
}
void selectedCI::sort_list(std::vector<IndexType>::iterator left, std::vector<IndexType>::iterator right)
{
std::vector<IndexType>::iterator i = left, j = right;
std::vector<IndexType>::iterator pivot = left;
std::advance(pivot, (std::distance(left,right)/(NAEA+NAEB))/2*(NAEA+NAEB));
std::copy(pivot,pivot+NAEA+NAEB,val_at_pivot.begin());
pivot = val_at_pivot.begin();
/* partition */
while (i <= j) {
while (list_order(i,pivot))
i+=(NAEA+NAEB);
while (list_order(pivot,j))
j-=(NAEA+NAEB);
if(i <= j) {
for(int k=0; k<NAEA+NAEB; k++) std::swap( *(i+k), *(j+k) );
i+=(NAEA+NAEB);
j-=(NAEA+NAEB);
}
};
/* recursion */
if (left < j)
sort_list(left, j);
if (i < right)
sort_list(i, right);
}
void selectedCI::sort_multiple(std::vector<IndexType>::iterator left, std::vector<IndexType>::iterator right, std::vector<ValueType>::iterator vl, std::vector<ValueType>::iterator vr)
{
if(left==right) return;
std::vector<IndexType>::iterator i = left, j = right;
std::vector<ValueType>::iterator vi = vl, vj = vr;
std::vector<IndexType>::iterator pivot = left;
std::advance(pivot, (std::distance(left,right)/(NAEA+NAEB))/2*(NAEA+NAEB));
std::copy(pivot,pivot+NAEA+NAEB,val_at_pivot.begin());
pivot = val_at_pivot.begin();
/* partition */
while (i <= j) {
while (list_order(i,pivot)) {
i+=(NAEA+NAEB);
vi++;
}
while (list_order(pivot,j)) {
j-=(NAEA+NAEB);
vj--;
}
if(i <= j) {
for(int k=0; k<NAEA+NAEB; k++) std::swap( *(i+k), *(j+k) );
std::swap(*vi,*vj);
i+=(NAEA+NAEB);
j-=(NAEA+NAEB);
vi++;
vj--;
}
};
/* recursion */
if (left < j)
sort_multiple(left, j, vl, vj);
if (i < right)
sort_multiple(i, right, vi, vr);
}
// 1. remove repeated from vnew
// 2. remove from vnew intersection with vold
void selectedCI::remove_repeated(std::vector<IndexType>& vnew, std::vector<IndexType>& vold)
{
std::vector<IndexType>::iterator first = vnew.begin();
std::vector<IndexType>::iterator last = vnew.end();
std::vector<IndexType>::iterator new_last = last;
if (first!=last) {
new_last=first;
first+=(NAEA+NAEB);
while (first < last)
{
if (!list_equal(new_last,first)) {
new_last+=(NAEA+NAEB);
for(int i=0; i<NAEA+NAEB; i++) *(new_last+i) = *(first++);
} else
first+=(NAEA+NAEB);
}
new_last+=(NAEA+NAEB);
}
// cut part of the vector with repeated elements
vnew.resize( std::distance(vnew.begin(),new_last) );
// now new_last points to the end of the good segment of the list
// remove intersection with vold
if(vnew.size() == 0 || vold.size()==0) return;
std::vector<IndexType>::iterator vold_first = vold.begin();
first = vnew.begin();
last = vnew.end();
new_last = first;
// loop through vnew, when a good (not found in vold) element is found, copy to new_last
while (first < last)
{
if (!mysearch(vold_first,vold.end(),first)) {
for(int i=0; i<NAEA+NAEB; i++) *(new_last++) = *(first++);
} else
first+=(NAEA+NAEB);
}
// cut part of the vector with repeated elements
vnew.resize( std::distance(vnew.begin(),new_last) );
}
// right now I'm building the Hamiltonian from scratch
// Later on store it and and rotate it according to changes in the ordering in occ_orbs
bool selectedCI::diagonalizeTrialWavefunction(std::vector<RealType>& eigVal, ValueMatrix& eigVec, std::vector<IndexType>& occ1, int nci1, std::vector<IndexType>& occ2, int nci2, bool eigV )
{
ValueType one = ValueType(1.0);
ValueType zero = ValueType(0.0);
bool sucess;
for(int i=0; i<nci1; i++)
std::sort(occ1.begin()+i*(NAEA+NAEB),occ1.begin()+(i+1)*(NAEA+NAEB));
for(int i=0; i<nci2; i++)
std::sort(occ2.begin()+i*(NAEA+NAEB),occ2.begin()+(i+1)*(NAEA+NAEB));
int nci = nci1+nci2;
if(myComm->rank()==0) {
Timer.reset("Generic3");
Timer.start("Generic3");
ValueMatrix hm(nci,nci);
ValueType let;
RealType sg;
std::vector<IndexType> occ(NAEA+NAEB);
IndexType n0,n1,n2,n3;
std::vector<IndexType> DL(NAEA+NAEB);
std::vector<IndexType> DR(NAEA+NAEB);
for(int ki=0; ki<nci; ki++) {
// i==j
let=zero;
std::vector<IndexType>::iterator it;
if(ki < nci1)
it = occ1.begin()+ki*(NAEA+NAEB);
else
it = occ2.begin()+(ki-nci1)*(NAEA+NAEB);
for(int i=0; i<NAEA+NAEB; i++)
{
let += sHam->H(*(it+i),*(it+i));
for(int j=i+1; j<NAEA+NAEB; j++) {
let += sHam->H(*(it+i),*(it+j),*(it+i),*(it+j)) - sHam->H(*(it+i),*(it+j),*(it+j),*(it+i));
}
}
hm(ki,ki) = let;
for(int kj=ki+1; kj<nci; kj++) {
std::vector<IndexType>::iterator jt;
if(kj < nci1)
jt = occ1.begin()+kj*(NAEA+NAEB);
else
jt = occ2.begin()+(kj-nci1)*(NAEA+NAEB);
std::copy(it,it+NAEA+NAEB,DL.begin());
std::copy(jt,jt+NAEA+NAEB,DR.begin());
int cnt = cntExcitations(NAEA,NAEB,DL,DR,n0,n1,n2,n3,occ,sg);
if(cnt==0) {
app_error()<<" Error: Found repeated determinant in trial wave function in MultiPureSingleDeterminant \n";
return false;
} else if(cnt==2) {
int nterms = NAEA+NAEB-1;
let=sHam->H(n0,n1);
for(int i=0; i<nterms; i++)
let+=sHam->H(n0,occ[i],n1,occ[i]) - sHam->H(n0,occ[i],occ[i],n1);
hm(ki,kj)=let*sg;
} else if(cnt==4) {
hm(ki,kj) = sg*(sHam->H(n0,n1,n2,n3) - sHam->H(n0,n1,n3,n2));
} else {
hm(ki,kj) = zero;
}
hm(kj,ki) = myconj(hm(ki,kj));
}
}
Timer.stop("Generic3");
//app_log()<<" Time to generate hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic2") <<std::endl;
Timer.reset("Generic4");
Timer.start("Generic4");
eigVal.resize(1);
if(eigV) eigVec.resize(1,nci);
sucess = DenseMatrixOperators::symEigenSysSelect(nci,hm.data(),nci,1,eigVal.data(),eigV,eigVec.data(),eigVec.size2());
Timer.stop("Generic4");
//app_log()<<" Time to diagonalize hamiltonian in diagonalizeTrialWavefunction: " <<Timer.total("Generic2") <<std::endl;
} else {
eigVal.resize(1);
if(eigV) eigVec.resize(1,nci);
}
myComm->bcast(sucess);
myComm->bcast(eigVal.data(),eigVal.size(),0,myComm->getMPI());
if(eigV) myComm->bcast(eigVec.data(),eigVec.size1()*eigVec.size2(),0,myComm->getMPI());
return sucess;
}
bool selectedCI::parse(xmlNodePtr cur)
{
if(cur==NULL) return false;
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
maxit=0;
cutoff_list=cutoff_diag=0;
build_full_hamiltonian = true;
std::string str("yes");
ParameterSet m_param;
m_param.add(str,"full_hamiltonian","std::string");
m_param.add(output_filename,"output_filename","std::string");
m_param.add(output_filename,"output","std::string");
m_param.add(cutoff_list,"cutoff_list","double");
m_param.add(cutoff_diag,"cutoff_diag","double");
m_param.add(maxit,"maxit","int");
m_param.add(diag_in_steps,"diag_steps","int");
m_param.put(cur);
std::transform(str.begin(),str.end(),str.begin(),(int (*)(int)) tolower);
if(str == "no" || str == "false") build_full_hamiltonian = false;
return true;
}
bool selectedCI::setup(HamPtr h0, WSetPtr w0, PropPtr p0, WfnPtr wf0)
{
if(h0==NULL) {
app_error()<<" Error: Null Hamiltonian pointer in selectedCI::setup(). \n";
return false;
}
ham0=h0;
wlkBucket=NULL;
prop0=NULL;
wfn0=NULL;
sHam = dynamic_cast<SparseGeneralHamiltonian*>(ham0);
if(!sHam) {
app_error()<<" Error in MultiPureSingleDeterminant::getHamiltonian. \n"
<<" Hamiltonian associated with MultiPureSingleDeterminant must of the \n"
<<" type SparseGeneralHamiltonian. \n";
APP_ABORT("");
}
app_log()<<"\n****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<" Beginning Driver initialization.\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<std::endl;
app_log()<<" Using " <<ncores_per_TG <<" cores per node in a TaskGroup. \n";
// right now this TG is not used. It is needed for setup purposes and to
// get a unique TG number for every group of cores on a node (used in the WalkerSet)
TG.setup(ncores_per_TG,1,false);
std::vector<int> TGdata(5);
TG.getSetupInfo(TGdata);
// setup local-to-node MPI Comm
// TGdata[0]: node_number
myComm->split_comm(TGdata[0],MPI_COMM_NODE_LOCAL);
TG.setNodeCommLocal(MPI_COMM_NODE_LOCAL);
int key = TG.getTGNumber();
myComm->split_comm(key,MPI_COMM_TG_LOCAL);
TG.setTGCommLocal(MPI_COMM_TG_LOCAL);
CommBuffer.setup(TG.getCoreRank()==0,std::string("COMMBuffer_")+std::to_string(myComm->rank()),MPI_COMM_TG_LOCAL);
TG.setBuffer(&CommBuffer);
app_log()<<"\n****************************************************\n"
<<" Initializating Hamiltonian \n"
<<"****************************************************\n"
<<std::endl;
// hamiltonian
if(!ham0->init(TGdata,&CommBuffer,MPI_COMM_TG_LOCAL,MPI_COMM_NODE_LOCAL)) {
app_error()<<"Error initializing Hamiltonian in selectedCI::setup" <<std::endl;
return false;
}
NuclearCoulombEnergy = toComplex(sHam->NuclearCoulombEnergy).real();
app_log()<<"\n****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<" Finished Driver initialization.\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<"****************************************************\n"
<<std::endl;
return true;
}
// writes checkpoint file
bool selectedCI::checkpoint(int block, int step)
{
return true;
}
// sets up restart archive and reads
bool selectedCI::restart(hdf_archive& read)
{
return true;
}
bool selectedCI::clear()
{
return true;
}
}

105
src/AFQMC/Drivers/selectedCI.h Executable file
View File

@ -0,0 +1,105 @@
#ifndef QMCPLUSPLUS_AFQMC_SELECTEDCI_H
#define QMCPLUSPLUS_AFQMC_SELECTEDCI_H
#include<Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "AFQMC/config.h"
#include "AFQMC/Drivers/Driver.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
namespace qmcplusplus
{
class selectedCI: public Driver
{
typedef HamiltonianBase* HamPtr;
typedef AFQMCInfo* InfoPtr;
public:
selectedCI(Communicate *c):Driver(c),build_full_hamiltonian(true),maxit(5),
diag_in_steps(-1),cutoff_list(0.1),cutoff_diag(0.1),
output_filename("selectedCI")
{
name = "selectedCI";
project_title = "selectedCI";
}
~selectedCI() {}
bool run();
bool parse(xmlNodePtr);
bool setup(HamPtr,WSetPtr,PropPtr,WfnPtr);
bool checkpoint(int,int);
bool restart(hdf_archive&);
bool clear();
bool diagonalizeTrialWavefunction(std::vector<RealType>& eigVal, ValueMatrix& eigVec, std::vector<IndexType>& occ1, int nci1, std::vector<IndexType>& occ2, int nci2, bool eigV=true);
protected:
SparseGeneralHamiltonian* sHam;
int maxit;
int diag_in_steps;
double cutoff_list;
double cutoff_diag;
std::string output_filename;
bool build_full_hamiltonian;
RealType NuclearCoulombEnergy;
std::vector<IndexType> occ_orbs;
std::vector<ValueType> ci;
std::vector<IndexType> val_at_pivot;
void sort_multiple(std::vector<IndexType>::iterator left, std::vector<IndexType>::iterator right, std::vector<ValueType>::iterator v1, std::vector<ValueType>::iterator v2);
void sort_list(std::vector<IndexType>::iterator left, std::vector<IndexType>::iterator right);
void remove_repeated(std::vector<IndexType>& vnew, std::vector<IndexType>& vold);
inline bool list_equal(std::vector<IndexType>::iterator left, std::vector<IndexType>::iterator right)
{
for(int i=0; i<NAEA+NAEB; i++)
if(!(*(left++) == *(right++))) return false;
return true;
}
inline bool list_order(std::vector<IndexType>::iterator left, std::vector<IndexType>::iterator right)
{
for(int i=0; i<NAEA+NAEB; i++,left++,right++)
if(*(left) == *(right))
continue;
else
return *left < *right;
return false; // they are equal
}
inline bool mysearch(std::vector<IndexType>::iterator& first, std::vector<IndexType>::iterator last, std::vector<IndexType>::iterator val)
{
std::vector<IndexType>::iterator left = first;
std::vector<IndexType>::iterator middle, right = last;
IndexType ne=NAEA+NAEB;
while (left <= right) {
middle = left;
std::advance(middle,(std::distance(left,right)/ne)/2*ne);
if (list_equal(middle,val)) {
first=middle;
return true;
} else if(list_order(middle,val))
left = middle + (NAEA+NAEB);
else
right = middle - (NAEA+NAEB);
}
first = left;
if(right < left) first = right;
return false;
}
};
}
#endif

View File

@ -0,0 +1,370 @@
#ifndef QMCPLUSPLUS_AFQMC_BASICESTIMATOR_H
#define QMCPLUSPLUS_AFQMC_BASICESTIMATOR_H
#include<Message/MPIObjectBase.h>
#include"AFQMC/config.h"
#include<vector>
#include<string>
#include<iostream>
#include<fstream>
#include "io/hdf_archive.h"
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Estimators/SlaterDetOperations.h"
namespace qmcplusplus
{
class BasicEstimator: public EstimatorBase
{
public:
typedef HamiltonianBase* HamPtr;
typedef WavefunctionHandler* WfnPtr;
typedef PropagatorBase* PropPtr;
typedef WalkerHandlerBase* WSetPtr;
BasicEstimator(Communicate *c):EstimatorBase(c),EstimEloc(false), timers(true), prtnwalk(true),
overlap(false), diag(false), nstates(0),SDet(c)
{}
~BasicEstimator() {}
void accumulate_block(WSetPtr wlkBucket)
{
int nW = wlkBucket->numWalkers(true);
ComplexType *sm,eloc,dum,oa,ob,w,ooa,oob;
LocalTimer->start("Block::EstimatorEloc");
if(EstimEloc) {
wfn0->evaluateLocalEnergyAndOverlap("Estimator",-1,wlkBucket);
if(core_rank==0) {
for(int i=0, cnt=0; i<nW; i++) {
if(!wlkBucket->isAlive(i) || std::abs(wlkBucket->getWeight(i)) <= 1e-6 || std::isnan(wlkBucket->getWeight(i).real())) continue;
sm = wlkBucket->getWalker(i,w,dum,ooa,oob); // "impsampl"
sm = wlkBucket->getWalker2(i,eloc,oa,ob); // "estimator"
if(std::isnan(ooa.real()) || std::isnan(oob.real()) || std::abs(oa*ob) < 1e-8 || std::abs(ooa*oob) < 1e-8) w=0;
dum = w*oa*ob/(ooa*oob);
edeno2 += dum;
enume2 += eloc*dum;
}
}
}
LocalTimer->stop("Block::EstimatorEloc");
}
void accumulate_step(WSetPtr wlkBucket)
{
ncalls++;
int nw=0;
RealType instant_weight=0.0;
if(core_rank==0) {
int nW = wlkBucket->numWalkers(true);
enume_sub = edeno_sub = 0.0;
ComplexType w,oa,ob,eloc;
for(int i=0; i<nW; i++) {
ComplexType* dum = wlkBucket->getWalker(i,w,eloc,oa,ob);
if(!wlkBucket->isAlive(i) || std::abs(w) <= 1e-6) continue;
enume_sub += w*eloc;
edeno_sub += w;
nw++;
instant_weight += std::abs(w);
}
if(nw>nwalk_max) nwalk_max=nw;
if(nw<nwalk_min) nwalk_min=nw;
data2[0] = enume_sub.real();
data2[1] = edeno_sub.real();
data2[4] = nwalk_sub/ncalls_substep;
data2[5] = instant_weight;
data2[6] = weight_sub/ncalls_substep;
data2[7] = ovlp_sub/ncalls_substep;
myComm->allreduce(data2,MPI_COMM_TG_LOCAL_HEADS);
wlkBucket->scaleWeight(targetW/data2[5]);
data2[0] *= targetW/data2[5];
data2[1] *= targetW/data2[5];
nwalk += nwalk_sub/ncalls_substep;
enume += enume_sub*targetW/data2[5];
edeno += edeno_sub*targetW/data2[5];
weight += instant_weight;
ovlp += ovlp_sub/ncalls_substep;
enume_sub=0.0;
edeno_sub=0.0;
nwalk_sub = 0;
weight_sub=0.0;
ncalls_substep=0;
ovlp_sub=0;
}
myComm->bcast(data2,MPI_COMM_TG_LOCAL);
}
void accumulate_substep(WSetPtr wlkBucket)
{
if(core_rank != 0) return;
ncalls_substep++;
int nW = wlkBucket->numWalkers(true);
ComplexType w,oa,ob,eloc;
int cnt1=0;
RealType sumo=0;
for(int i=0; i<nW; i++) {
ComplexType* dum = wlkBucket->getWalker(i,w,eloc,oa,ob);
if(!wlkBucket->isAlive(i) || std::abs(w) <= 1e-6) continue;
cnt1++;
nwalk_sub++;
enume_sub += w*eloc;
edeno_sub += w;
weight_sub += std::abs(w);
sumo += std::abs(oa*ob);
}
if(cnt1>1) ovlp_sub+=sumo/static_cast<double>(cnt1);
}
void tags(std::ofstream& out)
{
if(myComm->rank() == 0) {
out<<"nWalkers weight Eloc_nume Eloc_deno ";
if(EstimEloc) out<<"ElocEstim_nume ElocEstim_deno ";
out<<"Ovlp ";
if(diag && nstates>0) {
for(int i=0; i<nstates; i++) out<<"Ediag_" <<i <<" ";
#ifdef AFQMC_TIMER
out<<"evaluate_H_S solve_GEV" <<" ";
#endif
}
if(timers && EstimEloc) out<<"TimeEstimEloc ";
if(timers) out<<"TimePropg TimePopControl TimeLoadBalance TimeOrtho TimeCommSetup TimeCommResize TimeCommExch TimeBlock ";
if(prtnwalk) out<<"nWmin nWmax ";
}
}
void print(std::ofstream& out,WSetPtr wlkBucket)
{
// average over processors
if(core_rank==0) {
data[0] = enume.real()/ncalls;
data[1] = edeno.real()/ncalls;
data[2] = enume2.real();
data[3] = edeno2.real();
data[4] = nwalk/ncalls;
data[6] = weight/ncalls;
data[7] = ovlp/num_heads_tg;
data[5] = 0.0;
int nW = wlkBucket->numWalkers(true);
for(int i=0; i<nW; i++)
if(wlkBucket->isAlive(i)) data[5] += std::abs(wlkBucket->getWeight(i));
myComm->allreduce(data,MPI_COMM_TG_LOCAL_HEADS);
}
myComm->bcast(data,MPI_COMM_TG_LOCAL);
if(writer) {
out<<std::setprecision(6) <<data[4] <<" " <<data[6] <<" " <<std::setprecision(16) <<data[0] <<" " <<data[1] <<" ";
if(EstimEloc) out<<data[2] <<" " <<data[3] <<" ";
out<<data[7] <<" ";
if(diag && nstates>0) {
for(int i=0; i<nstates; i++) out<<eigVal[i] <<" ";
#ifdef AFQMC_TIMER
out<<LocalTimer->total("SlaterDetOperations::diag::evaluate_H_S") <<" "
<<LocalTimer->total("SlaterDetOperations::diag::solve_GEV") <<" ";
LocalTimer->reset("SlaterDetOperations::diag::evaluate_H_S");
LocalTimer->reset("SlaterDetOperations::diag::solve_GEV");
#endif
}
if(timers && EstimEloc)
out<<std::setprecision(3) <<LocalTimer->total("Block::EstimatorEloc") <<" ";
if(timers) out<<std::setprecision(3) <<LocalTimer->total("SubStep::Propagate") <<" "
<<LocalTimer->total("Step::PopControl") <<" "
<<LocalTimer->total("Step::loadBalance") <<" "
<<LocalTimer->total("Step::Orthogonalize") <<" "
<<LocalTimer->total("WalkerHandler::loadBalance::setup") <<" "
<<LocalTimer->total("WalkerHandler::loadBalance::resize") <<" "
<<LocalTimer->total("WalkerHandler::loadBalance::exchange") <<" "
<<LocalTimer->total("Block::TOTAL") <<" ";
if(prtnwalk) out<<wlkBucket->nwalk_min <<" " <<wlkBucket->nwalk_max <<std::setprecision(12) <<" ";
}
enume=0.0;
edeno=0.0;
weight=0.0;
enume2=0.0;
edeno2=0.0;
ncalls=0;
nwalk=0;
nwalk_min=1000000;
nwalk_max=0;
ovlp=0;
LocalTimer->reset("SubStep::Propagate");
LocalTimer->reset("Block::EstimatorEloc");
LocalTimer->reset("Step::PopControl");
LocalTimer->reset("Step::loadBalance");
LocalTimer->reset("Step::Orthogonalize");
LocalTimer->reset("WalkerHandler::loadBalance::setup");
LocalTimer->reset("WalkerHandler::loadBalance::exchange");
LocalTimer->reset("WalkerHandler::loadBalance::resize");
LocalTimer->reset("Block::TOTAL");
}
double getWeight()
{
return data[5];
}
double getWeight_step()
{
return data2[5];
}
double getEloc()
{
return data[0]/data[1];
}
double getEloc_step()
{
return data2[0]/data2[1];
}
void average(WSetPtr wlks) {}
bool parse(xmlNodePtr cur)
{
if(cur==NULL) return false;
ParameterSet m_param;
std::string str1,str2,str3,str4,str5;
m_param.add(str1,"estim_eloc","std::string");
m_param.add(str1,"estimeloc","std::string");
m_param.add(str1,"EstimEloc","std::string");
m_param.add(str2,"timers","std::string");
m_param.add(str3,"nwalk","std::string");
m_param.add(str4,"overlap","std::string");
m_param.add(str5,"diag","std::string");
m_param.add(nstates,"nstates","int");
m_param.put(cur);
std::transform(str1.begin(),str1.end(),str1.begin(),(int (*)(int)) tolower);
std::transform(str2.begin(),str2.end(),str2.begin(),(int (*)(int)) tolower);
std::transform(str3.begin(),str3.end(),str3.begin(),(int (*)(int)) tolower);
std::transform(str4.begin(),str4.end(),str4.begin(),(int (*)(int)) tolower);
std::transform(str5.begin(),str5.end(),str5.begin(),(int (*)(int)) tolower);
if(str1 == "yes" || str1 == "true")
EstimEloc = true;
if(str2 == "no" || str2 == "false")
timers = false;
if(str3 == "no" || str3 == "false")
prtnwalk = false;
if(str1 != "") EstimEloc_present=true;
if(str4 == "yes" || str4 == "true") overlap=true;
if(str5 == "yes" || str5 == "true") diag=true;
return true;
}
bool setup(std::vector<int>& TGdata, ComplexSMVector *v, HamiltonianBase* ham, WavefunctionHandler* wfn,myTimer* timer, MPI_Comm heads_comm, MPI_Comm tg_comm, MPI_Comm node_comm, MPI_Comm heads_of_tg_comm)
{
ncores_per_TG=TGdata[4];
core_rank = TGdata[1]%ncores_per_TG;
ham0=ham;
wfn0=wfn;
if(!EstimEloc_present) EstimEloc = wfn0->check_initialized("Estimator");
writer = (myComm->rank()==0);
LocalTimer = timer;
MPI_COMM_TG_LOCAL_HEADS = heads_of_tg_comm;
MPI_COMM_HEAD_OF_NODES = heads_comm;
MPI_COMM_NODE_LOCAL = node_comm;
MPI_COMM_TG_LOCAL = tg_comm;
MPI_Comm_size(MPI_COMM_TG_LOCAL_HEADS,&num_heads_tg);
if(overlap || (diag && nstates>0) ) {
SDet.copyInfo(*this);
SDet.setup(ham,LocalTimer);
eigVal.resize(nstates);
eigVec.resize(nstates,nstates);
}
data.resize(10);
data2.resize(10);
enume=0.0;
edeno=0.0;
enume_sub=0.0;
edeno_sub=0.0;
enume2=0.0;
edeno2=0.0;
weight=0.0;
weight_sub=0.0;
nwalk = 0;
nwalk_sub = 0;
ncalls=0;
ncalls_substep=0;
nwalk_min=1000000;
nwalk_max=0;
return true;
}
void setTargetWeight(RealType w0) { targetW = w0; }
private:
std::vector<double> data, data2;
ComplexType enume=0.0,edeno=0.0;
ComplexType enume_sub=0.0,edeno_sub=0.0;
ComplexType enume2=0.0,edeno2=0.0;
RealType weight, weight_sub, ovlp, ovlp_sub;
RealType targetW=1;
int nwalk, ncalls, ncalls_substep, nwalk_sub, nwalk_min, nwalk_max;
int core_rank;
int ncores_per_TG;
// optional
bool EstimEloc, timers, prtnwalk;
bool EstimEloc_present;
bool overlap, diag;
int nstates;
SlaterDetOperations SDet;
std::vector<RealType> eigVal;
ComplexMatrix eigVec;
ComplexType exactEnergy;
myTimer* LocalTimer;
int num_heads_tg;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
MPI_Comm MPI_COMM_NODE_LOCAL;
MPI_Comm MPI_COMM_TG_LOCAL;
MPI_Comm MPI_COMM_TG_LOCAL_HEADS;
};
}
#endif

View File

@ -0,0 +1,76 @@
#ifndef QMCPLUSPLUS_AFQMC_ESTIMATORBASE_H
#define QMCPLUSPLUS_AFQMC_ESTIMATORBASE_H
#include<Message/MPIObjectBase.h>
#include"AFQMC/config.h"
#include<vector>
#include<iostream>
#include<fstream>
#include "io/hdf_archive.h"
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Propagators/PropagatorBase.h"
namespace qmcplusplus
{
class EstimatorBase: public MPIObjectBase, public AFQMCInfo
{
public:
typedef HamiltonianBase* HamPtr;
typedef WavefunctionHandler* WfnPtr;
typedef PropagatorBase* PropPtr;
typedef WalkerHandlerBase* WSetPtr;
EstimatorBase(Communicate *c):MPIObjectBase(c) {}
~EstimatorBase() {}
virtual void accumulate_block(WSetPtr wlks)=0;
virtual void accumulate_step(WSetPtr wlks)=0;
virtual void accumulate_substep(WSetPtr wlks){};
virtual void print(std::ofstream& out,WalkerHandlerBase* wlks)=0;
virtual void tags(std::ofstream& out)=0;
virtual void average(WSetPtr wlks)=0;
virtual bool parse(xmlNodePtr)=0;
virtual bool setup(std::vector<int>& TGdata, ComplexSMVector *v,HamiltonianBase*,WavefunctionHandler*,myTimer* LocalTimer, MPI_Comm heads_comm, MPI_Comm tg_comm, MPI_Comm node_comm, MPI_Comm cm)=0;
virtual double getEloc() {return 0;}
virtual double getEloc_step() {return 0;}
virtual double getWeight() {return 0;}
virtual double getWeight_step() {return 0;}
virtual void setTargetWeight(RealType w0) {}
protected:
bool writer;
std::string filename, filetype, ID;
HamPtr ham0;
WfnPtr wfn0;
PropPtr prop0;
};
}
#endif

View File

@ -0,0 +1,148 @@
#ifndef QMCPLUSPLUS_AFQMC_ESTIMATORHANDLER_H
#define QMCPLUSPLUS_AFQMC_ESTIMATORHANDLER_H
#include<Message/MPIObjectBase.h>
#include"AFQMC/config.h"
#include"AFQMC/Estimators/EstimatorBase.h"
#include"AFQMC/Estimators/BasicEstimator.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
namespace qmcplusplus
{
class EstimatorHandler: public MPIObjectBase, public AFQMCInfo
{
public:
EstimatorHandler(Communicate *c):MPIObjectBase(c),filename("energy.dat") {}
~EstimatorHandler() {}
double getEloc()
{
return estimators[0]->getEloc();
}
double getEloc_step()
{
return estimators[0]->getEloc_step();
}
double getWeight()
{
return estimators[0]->getWeight();
}
double getWeight_step()
{
return estimators[0]->getWeight_step();
}
// meant for very cheap accumulations
void accumulate_substep(WalkerHandlerBase* wlks)
{
// only on basic for now
estimators[0]->accumulate_substep(wlks);
}
void print(int block, double time, double Es, double Eav, WalkerHandlerBase* wlks)
{
out<<block <<" " <<time <<" ";
for(std::vector<EstimatorBase*>::iterator it=estimators.begin(); it!=estimators.end(); it++)
(*it)->print(out,wlks);
out<<std::setprecision(12) <<Es <<" " <<Eav;
out<<std::endl;
if( (block+1)%10==0 ) out.flush();
}
// 1) acumulates estimators over steps, and 2) reduces and accumulates substep estimators
void accumulate_step(WalkerHandlerBase* wlks)
{
for(std::vector<EstimatorBase*>::iterator it=estimators.begin(); it!=estimators.end(); it++)
(*it)->accumulate_step(wlks);
}
// 1) acumulates estimators over steps, and 2) reduces and accumulates substep estimators
void accumulate_block(WalkerHandlerBase* wlks)
{
for(std::vector<EstimatorBase*>::iterator it=estimators.begin(); it!=estimators.end(); it++)
(*it)->accumulate_block(wlks);
}
void average(WalkerHandlerBase* wlks)
{
for(std::vector<EstimatorBase*>::iterator it=estimators.begin(); it!=estimators.end(); it++)
(*it)->average(wlks);
}
bool parse(xmlNodePtr cur)
{
if(cur==NULL) return false;
estimators.reserve(10);
estimators.clear();
BasicEstimator* basic = new BasicEstimator(myComm);
basic->copyInfo(*this);
estimators.push_back(basic);
xmlNodePtr curRoot=cur;
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="Estimator") {
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
if(name == "basic" || name == "Basic" || name == "starndard" ) {
if(!estimators[0]->parse(cur)) return false;
} else {
// create and parse new estimators
//->copyInfo(*this);
}
}
cur = cur->next;
}
}
// Estimator does not use TGs right now
bool setup(std::vector<int>& TGdata, ComplexSMVector* v, HamiltonianBase* ham0, WavefunctionHandler* wfn0, myTimer* LocalTimer, MPI_Comm heads_comm, MPI_Comm tg_comm, MPI_Comm node_comm, MPI_Comm head_tgs)
{
if(myComm->rank() == 0) {
filename = myComm->getName()+".scalar.dat";
//out.open(filename.c_str(),std::ios_base::app | std::ios_base::out);
out.open(filename.c_str());
if(out.fail()) {
app_log()<<"Problems opening estimator output file: " <<filename <<std::endl;
return false;
}
out<<"# block time ";
for(std::vector<EstimatorBase*>::iterator it=estimators.begin(); it!=estimators.end(); it++)
(*it)->tags(out);
out<<"Eshift Ebound ";
out<<std::endl;
}
for(std::vector<EstimatorBase*>::iterator it=estimators.begin(); it!=estimators.end(); it++)
(*it)->setup(TGdata,v,ham0,wfn0,LocalTimer,heads_comm,tg_comm,node_comm,head_tgs);
}
void setTargetWeight(RealType w0) { estimators[0]->setTargetWeight(w0); }
private:
std::vector<EstimatorBase*> estimators;
std::vector<std::string> tags;
std::string filename;
std::ofstream out;
};
}
#endif

View File

@ -0,0 +1,388 @@
#ifndef QMCPLUSPLUS_AFQMC_SLATERDETOPERATIONS_H
#define QMCPLUSPLUS_AFQMC_SLATERDETOPERATIONS_H
#include<fstream>
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "Numerics/DeterminantOperators.h"
#include "Numerics/Blasf.h"
#include "Numerics/MatrixOperators.h"
#include "Message/Communicate.h"
//#include "Message/CommOperators.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Walkers/SlaterDetWalker.h"
#include "AFQMC/Numerics/DenseMatrixOperations.h"
#include "AFQMC/Numerics/SparseMatrixOperations.h"
namespace qmcplusplus
{
//class SlaterDetOperations: public EstimatorBase
class SlaterDetOperations: public MPIObjectBase, public AFQMCInfo
{
public:
typedef HamiltonianBase* HamPtr;
SlaterDetOperations(Communicate *c):MPIObjectBase(c), ham(NULL) {}
void setup(HamPtr h, myTimer* timer_) {
ham=h;
timer=timer_;
GF.resize(2*NMO,NMO);
V0.resize(2*NMO*NMO);
Cwork.resize(2*NMO);
pivot.resize(2*NMO);
}
void green_function(ComplexType* A, ComplexType* B, ComplexType& ovlp, ComplexMatrix& G, bool getG=true) {
const ComplexType one = ComplexType(1.0);
const ComplexType zero = ComplexType(0.0);
// G = transpose( B * ( transpose(conjg(A)) * B )^-1 * transpose(conjg(A)) )
ComplexMatrix S0(NAEA,NAEA);
ComplexMatrix S1(NAEB,NAEB);
ComplexMatrix SS0(2*NMO,NAEA);
if(getG) G = ComplexType(0.0);
S0 = ComplexType(0.0);
S1 = ComplexType(0.0);
// S0 = transpose(conjg(A))*B
DenseMatrixOperators::product_AhB(NAEA,NAEA,NMO,one,A,NAEA,B,NAEA,zero,S0.data(),NAEA);
// S0 = S0^-1
ovlp = Invert(S0.data(), NAEA, NAEA, Cwork.data(),pivot.data());
// SS0 = B * S0
if(getG) DenseMatrixOperators::product(NMO,NAEA,NAEA,B,NAEA,S0.data(),NAEA,SS0.data(),NAEA);
// G(beta) = SS0*transpose(conjg(A))
if(getG) DenseMatrixOperators::product_ABh(NMO,NMO,NAEA,one,SS0.data(),NAEA,A,NAEA,zero,G.data(),NMO);
// S1 = transpose(conjg(A))*B
DenseMatrixOperators::product_AhB(NAEB,NAEB,NMO,one,A+NMO*NAEA,NAEA,B+NAEA*NMO,NAEA,zero,S1.data(),NAEB);
// S0 = S0^-1
ovlp *= Invert(S1.data(), NAEB, NAEB, Cwork.data(),pivot.data());
if(!getG) return;
if(std::abs(ovlp) < 1e-6) {
G = ComplexType(0.0);
return;
}
// SS0(beta) = B(beta) * S1
DenseMatrixOperators::product(NMO,NAEB,NAEB,B+NAEA*NMO,NAEA,S1.data(),NAEB,SS0.data()+NAEA*NMO,NAEA);
// G(beta) = SS0*transpose(conjg(A))
DenseMatrixOperators::product_ABh(NMO,NMO,NAEB,one,SS0.data()+NAEA*NMO,NAEA,A+NAEA*NMO,NAEA,zero,G.data()+NMO*NMO,NMO);
for(int i=0; i<NMO; i++)
for(int j=0; j<i; j++) {
std::swap(G(i,j),G(j,i));
std::swap(G(i+NMO,j),G(j+NMO,i));
}
}
void green_function(ComplexMatrix& A, ComplexMatrix& B, ComplexType& ovlp, ComplexMatrix& G, bool getG=true) {
const ComplexType one = ComplexType(1.0);
const ComplexType zero = ComplexType(0.0);
// G = transpose( B * ( transpose(conjg(A)) * B )^-1 * transpose(conjg(A)) )
ComplexMatrix S0(NAEA,NAEA);
ComplexMatrix S1(NAEB,NAEB);
ComplexMatrix SS0(2*NMO,NAEA);
if(getG) G = ComplexType(0.0);
S0 = ComplexType(0.0);
S1 = ComplexType(0.0);
// S0 = transpose(conjg(A))*B
DenseMatrixOperators::product_AhB(NAEA,NAEA,NMO,one,A.data(),NAEA,B.data(),NAEA,zero,S0.data(),NAEA);
// S0 = S0^-1
ovlp = Invert(S0.data(), NAEA, NAEA, Cwork.data(),pivot.data());
// SS0 = B * S0
if(getG) DenseMatrixOperators::product(NMO,NAEA,NAEA,B.data(),NAEA,S0.data(),NAEA,SS0.data(),NAEA);
// G(beta) = SS0*transpose(conjg(A))
if(getG) DenseMatrixOperators::product_ABh(NMO,NMO,NAEA,one,SS0.data(),NAEA,A.data(),NAEA,zero,G.data(),NMO);
// S1 = transpose(conjg(A))*B
DenseMatrixOperators::product_AhB(NAEB,NAEB,NMO,one,A.data()+NMO*NAEA,NAEA,B.data()+NAEA*NMO,NAEA,zero,S1.data(),NAEB);
// S0 = S0^-1
ovlp *= Invert(S1.data(), NAEB, NAEB, Cwork.data(),pivot.data());
if(!getG) return;
if(std::abs(ovlp) < 1e-6) {
G = ComplexType(0.0);
return;
}
// SS0(beta) = B(beta) * S1
DenseMatrixOperators::product(NMO,NAEB,NAEB,B.data()+NAEA*NMO,NAEA,S1.data(),NAEB,SS0.data()+NAEA*NMO,NAEA);
// G(beta) = SS0*transpose(conjg(A))
DenseMatrixOperators::product_ABh(NMO,NMO,NAEB,one,SS0.data()+NAEA*NMO,NAEA,A.data()+NAEA*NMO,NAEA,zero,G.data()+NMO*NMO,NMO);
for(int i=0; i<NMO; i++)
for(int j=0; j<i; j++) {
std::swap(G(i,j),G(j,i));
std::swap(G(i+NMO,j),G(j+NMO,i));
}
}
void matrix_element_and_overlap(ComplexType* A, ComplexType* B, ComplexType& ovlp, ComplexType& hamME) {
green_function(A,B,ovlp,GF);
if( std::abs(ovlp) < 1e-6 ) {
ovlp = ComplexType(0.0);
hamME = ComplexType(0.0);
return;
}
ComplexSMSpMat *V;
std::vector<s1D<ValueType> > *h;
int nr1=1, nc1=2*NMO*NMO;
ComplexType one = ComplexType(1.0,0.0);
ComplexType zero = ComplexType(0.0,0.0);
ham->getFullHam(h,V);
hamME = ComplexType(0.0);
SparseMatrixOperators::product_SpMatV<ComplexSMSpMat>(nc1,nc1,one,*V,GF.data(),zero,V0.data());
ComplexMatrix::iterator itG = GF.begin();
ComplexVector::iterator itV = V0.begin();
for(int i=0; i<nc1; i++,++itG,++itV) hamME += (*itV) * (*itG);
hamME = 0.5*hamME+ham->NuclearCoulombEnergy;
std::vector<s1D<ValueType> >::iterator end1 = h->end();
itG = GF.begin();
for(std::vector<s1D<ValueType> >::iterator it = h->begin(); it != end1; it++)
hamME += *(itG + std::get<0>(*it)) * std::get<1>(*it);
hamME *= ovlp;
}
void diag( std::vector<SlaterDetWalker>::iterator itbegin, std::vector<SlaterDetWalker>::iterator itend, int nstates, std::vector<RealType>& eigVal, ComplexMatrix& eigVec, ComplexType& exactEnergy, ComplexMatrix* HF, bool getEigV=false ) {
if(myComm->size() > 1 )
APP_ABORT(" ERROR: Estimators::SlaterDetOperations::diag(): Only implemented in serial. \n");
int N = 0;
for( std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1!=itend; it1++)
if(it1->alive && std::abs(it1->weight) > 1e-3) N++;
if(N == 0) return;
if(HF!=NULL) N++;
nstates = std::min(nstates,N);
ComplexMatrix H(N),S(N);
exactEnergy = ComplexType(0.0);
ComplexType nume=ComplexType(0.0);
ComplexType deno=ComplexType(0.0);
#ifdef AFQMC_TIMER
timer->start("SlaterDetOperations::diag::evaluate_H_S");
#endif
int i=0;
if(HF!=NULL){ // always include HF state in list
int j=i;
matrix_element_and_overlap(HF->data(),HF->data(),S(i,j),H(i,j));
j++;
for( std::vector<SlaterDetWalker>::iterator it2 = itbegin; it2!=itend; it2++) {
if( !(it2->alive && std::abs(it2->weight) > 1e-3) ) continue;
matrix_element_and_overlap(HF->data(),(it2->SlaterMat).data(),S(i,j),H(i,j));
if(i!=j) {
H(j,i)=std::conj(H(i,j));
S(j,i)=std::conj(S(i,j));
}
j++;
}
i++;
}
for( std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1!=itend; it1++) {
if( !(it1->alive && std::abs(it1->weight) > 1e-3) ) continue;
int j=i;
for( std::vector<SlaterDetWalker>::iterator it2 = it1; it2!=itend; it2++) {
if( !(it2->alive && std::abs(it2->weight) > 1e-3) ) continue;
matrix_element_and_overlap((it1->SlaterMat).data(),(it2->SlaterMat).data(),S(i,j),H(i,j));
nume += it1->weight*it2->weight*H(i,j);
deno += it1->weight*it2->weight*S(i,j);
if(i!=j) {
H(j,i)=std::conj(H(i,j));
S(j,i)=std::conj(S(i,j));
nume += std::conj(it1->weight)*it2->weight*H(j,i)/(std::conj(std::get<0>(it1->overlap_alpha)*std::get<0>(it1->overlap_beta)) * std::get<0>(it2->overlap_alpha)*std::get<0>(it2->overlap_beta) );
deno += std::conj(it1->weight)*it2->weight*S(j,i)/(std::conj(std::get<0>(it1->overlap_alpha)*std::get<0>(it1->overlap_beta)) * std::get<0>(it2->overlap_alpha)*std::get<0>(it2->overlap_beta) );
}
j++;
}
i++;
}
exactEnergy = nume/deno;
#ifdef AFQMC_TIMER
timer->stop("SlaterDetOperations::diag::evaluate_H_S");
#endif
#ifdef AFQMC_TIMER
timer->start("SlaterDetOperations::diag::solve_GEV");
#endif
if(nstates > 0) {
std::vector<int> ifail(N);
eigVal.resize(nstates);
getEigV=true;
if(getEigV)
eigVec.resize(nstates,N);
/*
std::ofstream out1("Hr.dat");
std::ofstream out2("Hc.dat");
std::ofstream out3("Sr.dat");
std::ofstream out4("Sc.dat");
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) out1<<H(i,j).real() <<" ";
for(int j=0; j<N; j++) out2<<H(i,j).imag() <<" ";
out1<<std::endl;
out2<<std::endl;
}
for(int i=0; i<N; i++) {
for(int j=0; j<N; j++) out3<<S(i,j).real() <<" ";
for(int j=0; j<N; j++) out4<<S(i,j).imag() <<" ";
out3<<std::endl;
out4<<std::endl;
}
out1.close();
out2.close();
out3.close();
out4.close();
APP_ABORT("Testing. \n");
*/
bool sucess = DenseMatrixOperators::genHermitianEigenSysSelect(N,H.data(),N,S.data(),N,nstates,eigVal.data(),getEigV,eigVec.data(),eigVec.size2(),ifail.data());
if(!sucess) for(int i=0; i<nstates; i++) eigVal[i]=0.0;
else {
std::ofstream out("diag.dat",std::ios_base::app | std::ios_base::out);
std::vector<double> coeff(N);
for(int i=0; i<N; i++) coeff[i] = std::abs(eigVec(1,i));
std::sort(coeff.begin(),coeff.end());
for(int i=0; i<N; i++) out<<coeff[i] <<" ";
out<<std::endl;
out.close();
}
}
#ifdef AFQMC_TIMER
timer->stop("SlaterDetOperations::diag::solve_GEV");
#endif
}
ComplexType overlap( std::vector<SlaterDetWalker>::iterator itbegin, std::vector<SlaterDetWalker>::iterator itend ) {
std::vector<ComplexType> ovlp(2,ComplexType(0.0));
ComplexType sum_w = ComplexType(0.0);
ComplexMatrix A(2*NMO,NAEA);
ComplexMatrix B(2*NMO,NAEA);
ComplexMatrix G(1);
std::vector<char> buffer_in;
std::vector<char> buffer_out;
std::vector<int> to(1);
std::vector<int> from(myComm->size());
int sz = itbegin->sizeForDump();
int nWtot=0, nW = 0, nWmax=0;
for(std::vector<SlaterDetWalker>::iterator it1 = itbegin; it1!=itend; it1++)
if(it1->alive) nW++;
to[0]=nW;
myComm->allgather(to,from,1);
for(int i=0; i<myComm->size(); i++) {
nWtot += from[i];
if(from[i] > nWmax) nWmax=from[i];
}
buffer_out.resize(nW*sz);
int cnt=0;
for(std::vector<SlaterDetWalker>::iterator it=itbegin; it!=itend; it++)
if(it->alive) {
it->dumpToChar( buffer_out.data()+cnt );
cnt+=sz;
}
ovlp[0] = ovlp[1] = ComplexType(0.0);
ComplexType w1, w2, o1, o2, e1, e2, ov;
// diagonal contribution
for(int i=0; i<nW; i++) {
itbegin->unpackFromChar(buffer_out.data()+sz*i,A,w1,e1,o1);
ovlp[0] += w1;
for(int j=i; j<nW; j++) {
itbegin->unpackFromChar(buffer_out.data()+j*sz,B,w2,e2,o2);
green_function(A,B,ov,G,false);
ovlp[1] += std::conj(w1)*w2*ov/(std::conj(o1)*o2)
+ std::conj(w2)*w1*std::conj(ov)/(std::conj(o2)*o1);
}
}
if(myComm->size() == 1)
return ovlp[0]/std::sqrt(std::abs(ovlp[1]));
buffer_in.resize(nWmax*sz);
int rec = (myComm->rank()+1)%(myComm->size());
int send = (myComm->rank()-1)%(myComm->size());
for(int i=0; i<myComm->size()-1; i++) {
// myComm->isend(send, send*myComm->size()+myComm->rank() ,buffer_out);
// myComm->irecv(rec, myComm->rank()*myComm->size()+rec ,buffer_in);
// dump way to avoid double counting, but efficiency depends heavily on load balance
if( rec < myComm->rank() ) {
// I only do the top half
for(int i=0; i<from[rec]/2; i++) {
itbegin->unpackFromChar(buffer_in.data()+sz*i,A,w1,e1,o1);
for(int j=0; j<nW; j++) {
itbegin->unpackFromChar(buffer_out.data()+j*sz,B,w2,e2,o2);
green_function(A,B,ov,G,false);
ovlp[1] += std::conj(w1)*w2*ov/(std::conj(o1)*o2)
+ std::conj(w2)*w1*std::conj(ov)/(std::conj(o2)*o1);
}
}
} else {
// I only do the bottom half
for(int i=nW/2; i<nW; i++) {
itbegin->unpackFromChar(buffer_out.data()+sz*i,A,w1,e1,o1);
for(int j=0; j<from[rec]; j++) {
itbegin->unpackFromChar(buffer_in.data()+j*sz,B,w2,e2,o2);
green_function(A,B,ov,G,false);
ovlp[1] += std::conj(w1)*w2*ov/(std::conj(o1)*o2)
+ std::conj(w2)*w1*std::conj(ov)/(std::conj(o2)*o1);
}
}
}
rec = (rec+1)%(myComm->size());
send = (send-1)%(myComm->size());
}
std::vector<ComplexType> res(2);
//myComm->gsum(ovlp);
return res[0]/std::sqrt(std::abs(res[1]));
}
private:
std::vector<ComplexType> Cwork;
std::vector<int> pivot;
HamPtr ham;
ComplexMatrix GF;
ComplexVector V0;
myTimer* timer;
};
}
#endif

View File

@ -0,0 +1,460 @@
#include<cstdlib>
#include<algorithm>
#include<complex>
#include<iostream>
#include<fstream>
#include<map>
#include<utility>
#if defined(USE_MPI)
#include<mpi.h>
#endif
#include "OhmmsData/libxmldefs.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "Utilities/SimpleParser.h"
#include "Configuration.h"
#include "io/hdf_archive.h"
#include "Message/CommOperators.h"
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/sync/interprocess_condition.hpp>
#include <boost/interprocess/sync/interprocess_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
#include "AFQMC/config.h"
#include "AFQMC/Hamiltonians/ProjectorBase.h"
#include "AFQMC/Hamiltonians/SparseGeneralHamiltonian.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Hamiltonians/CCProjector.h"
#include "AFQMC/Numerics/DenseMatrixOperations.h"
#include "AFQMC/Numerics/SparseMatrixOperations.h"
namespace qmcplusplus
{
bool CCProjector::initFromGuess()
{
SparseGeneralHamiltonian* sHam = dynamic_cast<SparseGeneralHamiltonian*>(ham0);
if(!sHam) {
app_error()<<" Error in PureSingleDeterminant::getHamiltonian. \n"
<<" Hamiltonian associated with PureSingleDeterminant must of the \n"
<<" type SparseGeneralHamiltonian. \n";
return false;
}
app_log()<<"Initializing Cluster Projector. " <<std::endl;
na = NAEA*(NMO-NAEA);
nb = NAEB*(NMO-NAEB);
// initialize with potential energy
Pmat.resize(na+nb,na+nb);
sHam->initializeCCProjector(Pmat);
for(int i=0; i<Pmat.rows(); i++) Pmat(i,i)=0.0;
}
bool CCProjector::initFromHDF5(const std::string& fileName)
{
/*
hdf_archive dump(myComm);
if(!dump.open(fileName,H5F_ACC_RDONLY,false)) {
app_error()<<" Error opening integral file in SparseGeneralHamiltonian. \n";
return false;
}
std::string path = "/Hamiltonian/SparseGeneralHamiltonian";
if(!dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Hamiltonian/SparseGeneralHamiltonian does not exists in restart file. \n";
return false;
}
if(!dump.push("Hamiltonian",false)) return false;
if(!dump.push("SparseGeneralHamiltonian",false)) return false;
std::vector<int> Idata(7);
if(!dump.read(Idata,"dims")) return false;
H1.resize(Idata[0]);
V2.resize(Idata[1]);
V2_2bar.resize(Idata[2]);
if(NMO < 0) NMO = Idata[3];
if(NAEA < 0) NAEA = Idata[4];
if(NAEB < 0) NAEB = Idata[5];
if(Idata[3] != NMO) {
app_error()<<" ERROR: NMO differs from value in integral file. \n";
return false;
}
if(Idata[4] != NAEA) {
app_error()<<" ERROR: NMO differs from value in integral file. \n";
return false;
}
if(Idata[5] != NAEB) {
app_error()<<" ERROR: NMO differs from value in integral file. \n";
return false;
}
spinRestricted = (Idata[6]==0)?(true):(false);
occup_alpha.resize(NAEA);
occup_beta.resize(NAEB);
Idata.resize(NAEA+NAEB);
if(!dump.read(Idata,"occups")) return false;
for(int i=0; i<NAEA; i++) occup_alpha[i] = Idata[i];
for(int i=NAEA, j=0; i<NAEA+NAEB; i++, j++) occup_beta[j] = Idata[i];
std::vector<double> Rdata(2);
if(!dump.read(Rdata,"Energies")) return false;
NuclearCoulombEnergy = Rdata[0];
FrozenCoreEnergy = Rdata[0];
int sz = std::max( 2*H1.size(), std::max( 4*V2.size(), 4*V2_2bar.size() ) );
std::vector<IndexType> ivec;
ivec.reserve(sz);
ivec.resize(2*H1.size());
if(!dump.read(ivec,"H1_indx")) return false;
for(int i=0, j=0; i<H1.size(); i++, j+=2)
H1[i] = std::make_tuple(ivec[j],ivec[j+1],0);
ivec.clear();
ivec.resize(4*V2.size());
if(!dump.read(ivec,"V2_indx")) return false;
for(int i=0, j=0; i<V2.size(); i++, j+=4)
V2[i] = std::make_tuple(ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],0);
ivec.clear();
ivec.resize(4*V2_2bar.size());
if(!dump.read(ivec,"V2_2bar_indx")) return false;
for(int i=0, j=0; i<V2_2bar.size(); i++, j+=4)
V2_2bar[i] = std::make_tuple(ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],0);
std::vector<IndexType>().swap(ivec);
sz = std::max( H1.size(), std::max( V2.size(), V2_2bar.size() ) );
std::vector<ValueType> vvec;
vvec.reserve(sz);
vvec.resize(H1.size());
if(!dump.read(vvec,"H1")) return false;
for(int i=0; i<H1.size(); i++)
std::get<2>(H1[i]) = vvec[i];
vvec.clear();
vvec.resize(V2.size());
if(!dump.read(vvec,"V2")) return false;
for(int i=0; i<V2.size(); i++)
std::get<4>(V2[i]) = vvec[i];
vvec.clear();
vvec.resize(V2_2bar.size());
if(!dump.read(vvec,"V2_2bar")) return false;
for(int i=0; i<V2_2bar.size(); i++)
std::get<4>(V2_2bar[i]) = vvec[i];
std::vector<ValueType>().swap(vvec);
dump.pop();
dump.pop();
dump.close();
return true;
*/
}
void CCProjector::hdf_write() {
/*
if(hdf_write_file == std::string("")) return;
hdf_archive dump(myComm);
if(!dump.create(hdf_write_file)) {
app_error()<<" Error opening restart file in SparseGeneralHamiltonian. \n";
return;
}
std::string path = "/Hamiltonian/SparseGeneralHamiltonian";
if(dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Hamiltonian/SparseGeneralHamiltonian already exists in restart file. Not over-writing data in file. \n";
return;
}
dump.push("Hamiltonian");
dump.push("SparseGeneralHamiltonian");
std::vector<int> Idata(7);
Idata[0]=H1.size();
Idata[1]=V2.size();
Idata[2]=V2_2bar.size();
Idata[3]=NMO;
Idata[4]=NAEA;
Idata[5]=NAEB;
Idata[6]=spinRestricted?(0):(1);
dump.write(Idata,"dims");
Idata.resize(NAEA+NAEB);
for(int i=0; i<NAEA; i++) Idata[i] = occup_alpha[i];
for(int i=NAEA, j=0; i<NAEA+NAEB; i++, j++) Idata[i] = occup_beta[j];
dump.write(Idata,"occups");
std::vector<double> Rdata(2);
Rdata[0] = NuclearCoulombEnergy;
Rdata[1] = FrozenCoreEnergy;
dump.write(Rdata,"Energies");
int sz = std::max( 2*H1.size(), std::max( 4*V2.size(), 4*V2_2bar.size() ) );
std::vector<IndexType> ivec;
ivec.reserve(sz);
ivec.resize(2*H1.size());
for(int i=0, j=0; i<H1.size(); i++, j+=2)
std::tie (ivec[j],ivec[j+1],std::ignore) = H1[i];
dump.write(ivec,"H1_indx");
ivec.clear();
ivec.resize(4*V2.size());
for(int i=0, j=0; i<V2.size(); i++, j+=4)
std::tie (ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],std::ignore) = V2[i];
dump.write(ivec,"V2_indx");
ivec.clear();
ivec.resize(4*V2_2bar.size());
for(int i=0, j=0; i<V2_2bar.size(); i++, j+=4)
std::tie (ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],std::ignore) = V2_2bar[i];
dump.write(ivec,"V2_2bar_indx");
std::vector<IndexType>().swap(ivec);
sz = std::max( H1.size(), std::max( V2.size(), V2_2bar.size() ) );
std::vector<ValueType> vvec;
vvec.reserve(sz);
vvec.resize(H1.size());
for(int i=0; i<H1.size(); i++)
std::tie (std::ignore,std::ignore,vvec[i]) = H1[i];
dump.write(vvec,"H1");
vvec.clear();
vvec.resize(V2.size());
for(int i=0; i<V2.size(); i++)
std::tie (std::ignore,std::ignore,std::ignore,std::ignore,vvec[i]) = V2[i];
dump.write(vvec,"V2");
vvec.clear();
vvec.resize(V2_2bar.size());
for(int i=0; i<V2_2bar.size(); i++)
std::tie (std::ignore,std::ignore,std::ignore,std::ignore,vvec[i]) = V2_2bar[i];
dump.write(vvec,"V2_2bar");
std::vector<ValueType>().swap(vvec);
dump.pop();
dump.pop();
dump.flush();
dump.close();
*/
}
void CCProjector::calculateHSPotentials_SparseDiagonalization(ComplexSMSpMat& Spvn)
{
}
void CCProjector::calculateHSPotentials_Diagonalization(ComplexSMSpMat& Spvn)
{
int rnk=0;
#if defined(USE_MPI)
rnk = rank();
#endif
int NMO2 = na+nb;
Timer.reset("Generic");
Timer.start("Generic");
std::cout<<"Pmat: \n";
std::cout<<Pmat <<std::endl;
if(!DenseMatrixOperators::isHermitian(NMO2,Pmat.data(),NMO2)) {
app_error()<<" Found non-Hermitian matrix during diagonalization of cluster operator. CCProjector::calculateHSPotentials_Diagonalization(). " <<std::endl;
APP_ABORT(" Found non-Hermitian matrix during diagonalization of cluster operator. CCProjector::calculateHSPotentials_Diagonalization(). \n");
}
ComplexMatrix eigVec(NMO2);
RealVector eigVal(NMO2);
if(!DenseMatrixOperators::symEigenSysAll(NMO2,Pmat.data(),NMO2,eigVal.data(),eigVec.data(),NMO2) ) {
app_error()<<"Problems with eigenvalue/eigenvector calculation in CCProjector::calculateHSPotentials_Diagonalization.\n";
APP_ABORT("Problems with eigenvalue/eigenvector calculation in CCProjector::calculateHSPotentials_Diagonalization.\n");
}
Timer.stop("Generic");
if(rnk==0) app_log()<<" -- Time to solve eigenvalue problem: " <<Timer.average("Generic") <<"\n";
for(int i=0; i<NMO2; i++) {
ComplexType scl = std::sqrt( ComplexType(eigVal[i]) ) ;
std::cout<<"eigval: " <<eigVal[i] <<" " <<scl <<std::endl;
for(int j=0; j<NMO2; j++)
eigVec(j,i) *= scl;
}
int cnt1=0;
int cnt2=0;
for(int i=0; i<NMO2; i++) {
if(std::abs(eigVal[i]) > std::abs(eigcut)) {
int cnt3=0;
for(int j=0; j<NMO2; j++)
if(std::abs(eigVec(j,i)) > cutoff_sparse) cnt3++;
if(cnt3 > 0) {
cnt1++;
cnt2 += cnt3;
}
}
}
// later on, instead of doing all ~M^4 terms, choose a few thousand randomly
if(test_breakup) {
if(rnk==0) app_log()<<" -- Testing Projector factorization. \n";
Timer.reset("Generic");
Timer.start("Generic");
RealType s=0.0;
RealType max=0.0;
for(IndexType i=0; i<NMO2; i++) {
for(IndexType j=0; j<NMO2; j++) {
ComplexType v2 = Pmat(i,j);
ComplexType v2c = 0.0;
for(int n=0; n<NMO2; n++) v2c += eigVec(i,n)*(eigVec(j,n));
s+=std::abs(v2-v2c);
if( max < std::abs(v2-v2c) ) max = std::abs(v2-v2c);
if( std::abs(v2-v2c) > 10*eigcut ) {
app_error()<<" Problems with Projector decomposition, i,j,P,Pc: "
<<i <<" "
<<j <<" "
<<v2 <<" "
<<v2c <<std::endl;
}
}
}
app_log()<<"\n ********************************************\n Average error due to truncated eigenvalue factorization (in units of cutoff), max error : " <<s/NMO2/NMO2 <<" " <<max <<" \n ********************************************\n"<<std::endl;
Timer.stop("Generic");
if(rnk==0) app_log()<<" -- Time to test eigenvalue factorization: " <<Timer.average("Generic") <<"\n";
}
Spvn.setDims(NMO2,cnt1);
Spvn.allocate_serial(cnt2);
ComplexType ifac = ComplexType(0.0,1.0);
cnt1=0;
for(int i=0; i<NMO2; i++) {
if(std::abs(eigVal[i]) > std::abs(eigcut)) {
int cnt3=0;
for(int j=0; j<NMO2; j++) {
if(std::abs(ifac*eigVec(j,i)) > cutoff_sparse) {
cnt3++;
int jk = j*NMO+j;
if(j>=NMO) jk-=NMO;
Spvn.add(jk,cnt1,ifac*eigVec(j,i));
}
}
if(cnt3 > 0)
cnt1++;
}
}
app_log()<<"Number of HS potentials in CCProjector: " <<Spvn.cols() <<std::endl;
app_log()<<"Number of terms in sparse representation of HS potentials: " <<Spvn.size() <<std::endl;
app_log()<<"Compressing Spvn. \n";
Spvn.compress();
app_log()<<"Done Compressing Spvn. \n";
}
void CCProjector::calculateHSPotentials(ComplexSMSpMat& Spvn)
{
//if(use_eig)
calculateHSPotentials_Diagonalization(Spvn);
}
bool CCProjector::parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
std::string bkp("no");
std::string use("no");
ParameterSet m_param;
m_param.add(eigcut,"cutoff_decomp","double");
m_param.add(eigcut,"cutoff_decomposition","double");
m_param.add(eigcut,"cutoff_factorization","double");
m_param.add(eigcut,"cutoff_cholesky","double");
m_param.add(cutoff_sparse,"cutoff_sparse","double");
m_param.add(filetype,"filetype","std::string");
m_param.add(filename,"filename","std::string");
m_param.add(hdf_write_file,"hdf_write_file","std::string");
m_param.add(bkp,"test_breakup","std::string");
m_param.add(use,"useCholesky","std::string");
std::string par("no");
m_param.add(par,"paral_fac","std::string");
m_param.add(par,"parallel_fac","std::string");
m_param.add(par,"parallel_factorization","std::string");
m_param.put(cur);
std::transform(par.begin(),par.end(),par.begin(),(int (*)(int)) tolower);
if(par == "yes" || par == "true") parallel_factorization = true;
use_eig=true;
std::transform(use.begin(),use.end(),use.begin(),(int (*)(int)) tolower);
if(use == "true" || use == "yes") use_eig = false;
std::transform(filetype.begin(),filetype.end(),filetype.begin(),(int (*)(int))tolower);
std::transform(bkp.begin(),bkp.end(),bkp.begin(),(int (*)(int))tolower);
if(bkp == "yes" || bkp == "true") test_breakup = true;
if(use_eig)
app_log()<<"Calculating factorization of 2 body interaction with direct diagonalization.\n";
else
app_log()<<"Calculating factorization of 2 body interaction with Cholesky method.\n";
std::transform(par.begin(),par.end(),par.begin(),(int (*)(int)) tolower);
if(par == "yes" || par == "true") parallel_factorization = true;
if(parallel_factorization)
app_log()<<"Calculating factorization of 2-bofy hamiltonian in parallel. \n";
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="something") {
}
cur = cur->next;
}
return true;
}
// do a general check of parameters for consistency
// make sure object was build consistently and correctly
bool CCProjector::checkObject()
{
return true;
}
}

View File

@ -0,0 +1,75 @@
#ifndef QMCPLUSPLUS_AFQMC_CCPROJECTOR_H
#define QMCPLUSPLUS_AFQMC_CCPROJECTOR_H
#include<iostream>
#include<vector>
#include<map>
#include<fstream>
#include<Message/MPIObjectBase.h>
#include "OhmmsData/libxmldefs.h"
#include"AFQMC/config.h"
namespace qmcplusplus
{
class CCProjector: public ProjectorBase
{
typedef HamiltonianBase* HamPtr;
public:
CCProjector(Communicate *c):ProjectorBase(c)
{
}
~CCProjector() {}
void calculateHSPotentials(ComplexSMSpMat&);
void calculateHSPotentials_SparseDiagonalization(ComplexSMSpMat&);
void calculateHSPotentials_Diagonalization(ComplexSMSpMat&);
// parse xml input node
bool parse(xmlNodePtr cur);
// check object
bool checkObject();
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
void hdf_write();
protected:
bool parallel_factorization=false;
bool use_eig=true;
bool symmetric=false;
int na;
int nb;
// sum_ij P(i,j) n_i n_j , where n_k = c+_k c_k
// P(i,j) == 0, P(i,j)==P(j,i)
ComplexMatrix Pmat;
bool initFromASCII(const std::string& fileName) {};
bool initFromXML(const std::string& fileName) {};
bool initFromHDF5(const std::string& fileName);
bool initFromGuess();
};
}
#endif

View File

@ -0,0 +1,436 @@
#include<cstdlib>
#include<algorithm>
#include<complex>
#include<iostream>
#include<fstream>
#include<map>
#include<utility>
#if defined(USE_MPI)
#include<mpi.h>
#endif
#include "OhmmsData/libxmldefs.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "Utilities/SimpleParser.h"
#include "Configuration.h"
#include "io/hdf_archive.h"
#include "Message/CommOperators.h"
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/sync/interprocess_condition.hpp>
#include <boost/interprocess/sync/interprocess_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
#include "AFQMC/config.h"
#include "AFQMC/Hamiltonians/ProjectorBase.h"
#include "AFQMC/Hamiltonians/DDProjector.h"
#include "AFQMC/Numerics/DenseMatrixOperations.h"
#include "AFQMC/Numerics/SparseMatrixOperations.h"
namespace qmcplusplus
{
bool DDProjector::initFromGuess()
{
// start by just setting to zero
Pmat.resize(2*NMO,2*NMO);
Pmat=0;
}
bool DDProjector::initFromHDF5(const std::string& fileName)
{
/*
hdf_archive dump(myComm);
if(!dump.open(fileName,H5F_ACC_RDONLY,false)) {
app_error()<<" Error opening integral file in SparseGeneralHamiltonian. \n";
return false;
}
std::string path = "/Hamiltonian/SparseGeneralHamiltonian";
if(!dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Hamiltonian/SparseGeneralHamiltonian does not exists in restart file. \n";
return false;
}
if(!dump.push("Hamiltonian",false)) return false;
if(!dump.push("SparseGeneralHamiltonian",false)) return false;
std::vector<int> Idata(7);
if(!dump.read(Idata,"dims")) return false;
H1.resize(Idata[0]);
V2.resize(Idata[1]);
V2_2bar.resize(Idata[2]);
if(NMO < 0) NMO = Idata[3];
if(NAEA < 0) NAEA = Idata[4];
if(NAEB < 0) NAEB = Idata[5];
if(Idata[3] != NMO) {
app_error()<<" ERROR: NMO differs from value in integral file. \n";
return false;
}
if(Idata[4] != NAEA) {
app_error()<<" ERROR: NMO differs from value in integral file. \n";
return false;
}
if(Idata[5] != NAEB) {
app_error()<<" ERROR: NMO differs from value in integral file. \n";
return false;
}
spinRestricted = (Idata[6]==0)?(true):(false);
occup_alpha.resize(NAEA);
occup_beta.resize(NAEB);
Idata.resize(NAEA+NAEB);
if(!dump.read(Idata,"occups")) return false;
for(int i=0; i<NAEA; i++) occup_alpha[i] = Idata[i];
for(int i=NAEA, j=0; i<NAEA+NAEB; i++, j++) occup_beta[j] = Idata[i];
std::vector<double> Rdata(2);
if(!dump.read(Rdata,"Energies")) return false;
NuclearCoulombEnergy = Rdata[0];
FrozenCoreEnergy = Rdata[0];
int sz = std::max( 2*H1.size(), std::max( 4*V2.size(), 4*V2_2bar.size() ) );
std::vector<IndexType> ivec;
ivec.reserve(sz);
ivec.resize(2*H1.size());
if(!dump.read(ivec,"H1_indx")) return false;
for(int i=0, j=0; i<H1.size(); i++, j+=2)
H1[i] = std::make_tuple(ivec[j],ivec[j+1],0);
ivec.clear();
ivec.resize(4*V2.size());
if(!dump.read(ivec,"V2_indx")) return false;
for(int i=0, j=0; i<V2.size(); i++, j+=4)
V2[i] = std::make_tuple(ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],0);
ivec.clear();
ivec.resize(4*V2_2bar.size());
if(!dump.read(ivec,"V2_2bar_indx")) return false;
for(int i=0, j=0; i<V2_2bar.size(); i++, j+=4)
V2_2bar[i] = std::make_tuple(ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],0);
std::vector<IndexType>().swap(ivec);
sz = std::max( H1.size(), std::max( V2.size(), V2_2bar.size() ) );
std::vector<ValueType> vvec;
vvec.reserve(sz);
vvec.resize(H1.size());
if(!dump.read(vvec,"H1")) return false;
for(int i=0; i<H1.size(); i++)
std::get<2>(H1[i]) = vvec[i];
vvec.clear();
vvec.resize(V2.size());
if(!dump.read(vvec,"V2")) return false;
for(int i=0; i<V2.size(); i++)
std::get<4>(V2[i]) = vvec[i];
vvec.clear();
vvec.resize(V2_2bar.size());
if(!dump.read(vvec,"V2_2bar")) return false;
for(int i=0; i<V2_2bar.size(); i++)
std::get<4>(V2_2bar[i]) = vvec[i];
std::vector<ValueType>().swap(vvec);
dump.pop();
dump.pop();
dump.close();
return true;
*/
}
void DDProjector::hdf_write() {
/*
if(hdf_write_file == std::string("")) return;
hdf_archive dump(myComm);
if(!dump.create(hdf_write_file)) {
app_error()<<" Error opening restart file in SparseGeneralHamiltonian. \n";
return;
}
std::string path = "/Hamiltonian/SparseGeneralHamiltonian";
if(dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Hamiltonian/SparseGeneralHamiltonian already exists in restart file. Not over-writing data in file. \n";
return;
}
dump.push("Hamiltonian");
dump.push("SparseGeneralHamiltonian");
std::vector<int> Idata(7);
Idata[0]=H1.size();
Idata[1]=V2.size();
Idata[2]=V2_2bar.size();
Idata[3]=NMO;
Idata[4]=NAEA;
Idata[5]=NAEB;
Idata[6]=spinRestricted?(0):(1);
dump.write(Idata,"dims");
Idata.resize(NAEA+NAEB);
for(int i=0; i<NAEA; i++) Idata[i] = occup_alpha[i];
for(int i=NAEA, j=0; i<NAEA+NAEB; i++, j++) Idata[i] = occup_beta[j];
dump.write(Idata,"occups");
std::vector<double> Rdata(2);
Rdata[0] = NuclearCoulombEnergy;
Rdata[1] = FrozenCoreEnergy;
dump.write(Rdata,"Energies");
int sz = std::max( 2*H1.size(), std::max( 4*V2.size(), 4*V2_2bar.size() ) );
std::vector<IndexType> ivec;
ivec.reserve(sz);
ivec.resize(2*H1.size());
for(int i=0, j=0; i<H1.size(); i++, j+=2)
std::tie (ivec[j],ivec[j+1],std::ignore) = H1[i];
dump.write(ivec,"H1_indx");
ivec.clear();
ivec.resize(4*V2.size());
for(int i=0, j=0; i<V2.size(); i++, j+=4)
std::tie (ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],std::ignore) = V2[i];
dump.write(ivec,"V2_indx");
ivec.clear();
ivec.resize(4*V2_2bar.size());
for(int i=0, j=0; i<V2_2bar.size(); i++, j+=4)
std::tie (ivec[j],ivec[j+1],ivec[j+2],ivec[j+3],std::ignore) = V2_2bar[i];
dump.write(ivec,"V2_2bar_indx");
std::vector<IndexType>().swap(ivec);
sz = std::max( H1.size(), std::max( V2.size(), V2_2bar.size() ) );
std::vector<ValueType> vvec;
vvec.reserve(sz);
vvec.resize(H1.size());
for(int i=0; i<H1.size(); i++)
std::tie (std::ignore,std::ignore,vvec[i]) = H1[i];
dump.write(vvec,"H1");
vvec.clear();
vvec.resize(V2.size());
for(int i=0; i<V2.size(); i++)
std::tie (std::ignore,std::ignore,std::ignore,std::ignore,vvec[i]) = V2[i];
dump.write(vvec,"V2");
vvec.clear();
vvec.resize(V2_2bar.size());
for(int i=0; i<V2_2bar.size(); i++)
std::tie (std::ignore,std::ignore,std::ignore,std::ignore,vvec[i]) = V2_2bar[i];
dump.write(vvec,"V2_2bar");
std::vector<ValueType>().swap(vvec);
dump.pop();
dump.pop();
dump.flush();
dump.close();
*/
}
void DDProjector::calculateHSPotentials_SparseDiagonalization(ComplexSMSpMat& Spvn)
{
}
void DDProjector::calculateHSPotentials_Diagonalization(ComplexSMSpMat& Spvn)
{
int rnk=0;
#if defined(USE_MPI)
rnk = rank();
#endif
int NMO2 = 2*NMO;
Timer.reset("Generic");
Timer.start("Generic");
ValueMatrix eigVec(NMO2);
RealVector eigVal(NMO2);
if(!DenseMatrixOperators::symEigenSysAll(NMO2,Pmat.data(),NMO2,eigVal.data(),eigVec.data(),NMO2) ) {
app_error()<<"Problems with eigenvalue/eigenvector calculation in DDProjector::calculateHSPotentials_Diagonalization.\n";
APP_ABORT("Problems with eigenvalue/eigenvector calculation in DDProjector::calculateHSPotentials_Diagonalization.\n");
}
Timer.stop("Generic");
if(rnk==0) app_log()<<" -- Time to solve eigenvalue problem: " <<Timer.average("Generic") <<"\n";
for(int i=0; i<NMO2; i++) {
RealType scl = std::sqrt( std::max(0.0,eigVal[i]) ) ;
for(int j=0; j<NMO2; j++)
eigVec(j,i) *= scl;
}
int cnt1=0;
int cnt2=0;
for(int i=0; i<NMO2; i++) {
if(eigVal[i] > std::abs(eigcut)) {
int cnt3=0;
for(int j=0; j<NMO2; j++)
if(std::abs(eigVec(j,i)) > cutoff_sparse) cnt3++;
if(cnt3 > 0) {
cnt1++;
cnt2 += cnt3;
}
}
}
// later on, instead of doing all ~M^4 terms, choose a few thousand randomly
if(test_breakup) {
if(rnk==0) app_log()<<" -- Testing Projector factorization. \n";
Timer.reset("Generic");
Timer.start("Generic");
RealType s=0.0;
RealType max=0.0;
for(IndexType i=0; i<2*NMO; i++) {
for(IndexType j=0; j<2*NMO; j++) {
ValueType v2 = Pmat(i,j);
ValueType v2c = 0.0;
for(int n=0; n<NMO2; n++) v2c += eigVec(i,n)*myconj(eigVec(j,n));
s+=std::abs(v2-v2c);
if( max < std::abs(v2-v2c) ) max = std::abs(v2-v2c);
if( std::abs(v2-v2c) > 10*eigcut ) {
app_error()<<" Problems with Projector decomposition, i,j,P,Pc: "
<<i <<" "
<<j <<" "
<<v2 <<" "
<<v2c <<std::endl;
}
}
}
app_log()<<"\n ********************************************\n Average error due to truncated eigenvalue factorization (in units of cutoff), max error : " <<s/eigcut/NMO/NMO/4.0 <<" " <<max <<" \n ********************************************\n"<<std::endl;
Timer.stop("Generic");
if(rnk==0) app_log()<<" -- Time to test eigenvalue factorization: " <<Timer.average("Generic") <<"\n";
}
Spvn.setDims(NMO2,cnt1);
Spvn.allocate_serial(cnt2);
ComplexType ifac = ComplexType(0.0,1.0);
cnt1=0;
for(int i=0; i<NMO2; i++) {
if(eigVal[i] > std::abs(eigcut)) {
int cnt3=0;
for(int j=0; j<2*NMO; j++) {
if(std::abs(ifac*eigVec(j,i)) > cutoff_sparse) {
cnt3++;
int jk = j*NMO+j;
if(j>=NMO) jk-=NMO;
Spvn.add(jk,cnt1,ifac*eigVec(j,i));
}
}
if(cnt3 > 0)
cnt1++;
}
}
app_log()<<"Number of HS potentials in DDProjector: " <<Spvn.cols() <<std::endl;
app_log()<<"Number of terms in sparse representation of HS potentials: " <<Spvn.size() <<std::endl;
app_log()<<"Compressing Spvn. \n";
Spvn.compress();
app_log()<<"Done Compressing Spvn. \n";
}
void DDProjector::calculateHSPotentials(ComplexSMSpMat& Spvn)
{
//if(use_eig)
calculateHSPotentials_Diagonalization(Spvn);
}
bool DDProjector::parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
std::string bkp("no");
std::string use("no");
ParameterSet m_param;
m_param.add(eigcut,"cutoff_decomp","double");
m_param.add(eigcut,"cutoff_decomposition","double");
m_param.add(eigcut,"cutoff_factorization","double");
m_param.add(eigcut,"cutoff_cholesky","double");
m_param.add(cutoff_sparse,"cutoff_sparse","double");
m_param.add(filetype,"filetype","std::string");
m_param.add(filename,"filename","std::string");
m_param.add(hdf_write_file,"hdf_write_file","std::string");
m_param.add(bkp,"test_breakup","std::string");
m_param.add(use,"useCholesky","std::string");
std::string par("no");
m_param.add(par,"paral_fac","std::string");
m_param.add(par,"parallel_fac","std::string");
m_param.add(par,"parallel_factorization","std::string");
m_param.put(cur);
std::transform(par.begin(),par.end(),par.begin(),(int (*)(int)) tolower);
if(par == "yes" || par == "true") parallel_factorization = true;
use_eig=true;
std::transform(use.begin(),use.end(),use.begin(),(int (*)(int)) tolower);
if(use == "true" || use == "yes") use_eig = false;
std::transform(filetype.begin(),filetype.end(),filetype.begin(),(int (*)(int))tolower);
std::transform(bkp.begin(),bkp.end(),bkp.begin(),(int (*)(int))tolower);
if(bkp == "yes" || bkp == "true") test_breakup = true;
if(use_eig)
app_log()<<"Calculating factorization of 2 body interaction with direct diagonalization.\n";
else
app_log()<<"Calculating factorization of 2 body interaction with Cholesky method.\n";
std::transform(par.begin(),par.end(),par.begin(),(int (*)(int)) tolower);
if(par == "yes" || par == "true") parallel_factorization = true;
if(parallel_factorization)
app_log()<<"Calculating factorization of 2-bofy hamiltonian in parallel. \n";
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="something") {
}
cur = cur->next;
}
return true;
}
// do a general check of parameters for consistency
// make sure object was build consistently and correctly
bool DDProjector::checkObject()
{
return true;
}
}

View File

@ -0,0 +1,71 @@
#ifndef QMCPLUSPLUS_AFQMC_DDPROJECTOR_H
#define QMCPLUSPLUS_AFQMC_DDPROJECTOR_H
#include<iostream>
#include<vector>
#include<map>
#include<fstream>
#include<Message/MPIObjectBase.h>
#include "OhmmsData/libxmldefs.h"
#include"AFQMC/config.h"
namespace qmcplusplus
{
class DDProjector: public ProjectorBase
{
typedef HamiltonianBase* HamPtr;
public:
DDProjector(Communicate *c):ProjectorBase(c)
{
}
~DDProjector() {}
void calculateHSPotentials(ComplexSMSpMat&);
void calculateHSPotentials_SparseDiagonalization(ComplexSMSpMat&);
void calculateHSPotentials_Diagonalization(ComplexSMSpMat&);
// parse xml input node
bool parse(xmlNodePtr cur);
// check object
bool checkObject();
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
void hdf_write();
protected:
bool parallel_factorization=false;
bool use_eig=true;
// sum_ij P(i,j) n_i n_j , where n_k = c+_k c_k
// P(i,j) == 0, P(i,j)==P(j,i)
ValueMatrix Pmat;
bool initFromASCII(const std::string& fileName) {};
bool initFromXML(const std::string& fileName) {};
bool initFromHDF5(const std::string& fileName);
bool initFromGuess();
};
}
#endif

View File

@ -0,0 +1,123 @@
#ifndef QMCPLUSPLUS_AFQMC_HAMILTONIANBASE_H
#define QMCPLUSPLUS_AFQMC_HAMILTONIANBASE_H
#include<iostream>
#include<vector>
#include<map>
#include<fstream>
#include<Message/MPIObjectBase.h>
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/Utilities/taskgroup.h"
#include"AFQMC/config.h"
namespace qmcplusplus
{
class HamiltonianBase: public MPIObjectBase, public AFQMCInfo
{
public:
HamiltonianBase(Communicate *c):MPIObjectBase(c),TG(c,"HamiltonianTG"),name(""),filetype("undefined"),filename("undefined"),test_breakup(false),head_of_nodes(false),distribute_Ham(false),min_i(0),max_i(0),nnodes_per_TG(1)
{
FrozenCoreEnergy = NuclearCoulombEnergy = ValueType(0.0);
}
~HamiltonianBase() {}
inline int getNMO_FULL() {return NMO_FULL;}
inline int getNAEA() { return NAEA;}
inline int getNAEB() { return NAEB;}
inline int getNCA() { return NCA;}
inline int getNCB() { return NCB;}
inline bool RHF() {return spinRestricted;}
bool init(std::vector<int>& TGdata, ComplexSMVector* TGbuff, MPI_Comm tg_comm, MPI_Comm node_comm )
{
if(nnodes_per_TG > 1) distribute_Ham=true;
if(!TG.quick_setup(TGdata[4],nnodes_per_TG,TGdata[0],TGdata[1],TGdata[2],TGdata[3]))
return false;
TG.setBuffer(TGbuff);
TG.setNodeCommLocal(node_comm);
TG.setTGCommLocal(tg_comm);
if(filetype == "fcidump" || filetype == "ascii")
return initFromASCII(filename);
else if(filetype == "xml")
return initFromXML(filename);
else if(filetype == "hdf5")
return initFromHDF5(filename);
else {
app_error()<<"Unknown filetype in HamiltonianBase::init(): " <<filetype <<std::endl;
return false;
}
}
virtual void calculateHSPotentials(const RealType cut, const RealType dt, ComplexSMSpMat&, TaskGroup& TGprop, std::vector<int>& nvec_per_node, bool paral )=0;
virtual void calculateHSPotentials_Diagonalization(const RealType cut, const RealType dt, ComplexSMSpMat&, TaskGroup& TGprop, std::vector<int>& nvec_per_node, bool paral)=0;
virtual void calculateOneBodyPropagator(const RealType cut, const RealType dt, ComplexMatrix& Hadd, std::vector<s2D<ComplexType> >& Pkin)=0;
virtual bool generateFullHamiltonianForME()=0;
virtual bool getFullHam(std::vector<s1D<ValueType> >*& h, ComplexSMSpMat*& v)=0;
// parse xml input node
virtual bool parse(xmlNodePtr cur)=0;
// check object
virtual bool checkObject()=0;
// keep a copy of Reference State in FCIDUMP
ComplexMatrix RefWFn;
// name of the object
std::string name;
// nuclear coulomb term
ValueType NuclearCoulombEnergy;
ValueType FrozenCoreEnergy;
// timestep
RealType dt;
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
// void setTGComm(bool hd, MPI_Comm comm) {
// head_of_local_tg=hd;
// MPI_COMM_LOCAL_TG = comm;
// }
protected:
// for hamiltonian distribution
TaskGroup TG;
int nnodes_per_TG;
std::string filetype;
std::string filename;
bool test_breakup;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
bool distribute_Ham; // implement assuming factorized Ham first
int min_i, max_i;
virtual bool initFromASCII(const std::string& fileName)=0;
virtual bool initFromXML(const std::string& fileName)=0;
virtual bool initFromHDF5(const std::string& fileName)=0;
};
}
#endif

View File

@ -0,0 +1,92 @@
#ifndef QMCPLUSPLUS_AFQMC_PROJECTORBASE_H
#define QMCPLUSPLUS_AFQMC_PROJECTORBASE_H
#include<iostream>
#include<vector>
#include<map>
#include<fstream>
#include<Message/MPIObjectBase.h>
#include "OhmmsData/libxmldefs.h"
#include"AFQMC/config.h"
#include"AFQMC/Hamiltonians/HamiltonianBase.h"
namespace qmcplusplus
{
class ProjectorBase: public MPIObjectBase, public AFQMCInfo
{
typedef HamiltonianBase* HamPtr;
public:
ProjectorBase(Communicate *c):MPIObjectBase(c),name(""),filetype("undefined"),filename("undefined"),test_breakup(false),head_of_nodes(false),cutoff_sparse(1e-5),eigcut(1e-5)
{
}
~ProjectorBase() {}
bool init(HamPtr h)
{
ham0 = h;
if(filetype == "ascii")
return initFromASCII(filename);
else if(filetype == "xml")
return initFromXML(filename);
else if(filetype == "hdf5")
return initFromHDF5(filename);
else
return initFromGuess();
}
virtual void calculateHSPotentials(ComplexSMSpMat&)=0;
virtual void calculateHSPotentials_SparseDiagonalization(ComplexSMSpMat&)=0;
virtual void calculateHSPotentials_Diagonalization(ComplexSMSpMat&)=0;
// parse xml input node
virtual bool parse(xmlNodePtr cur)=0;
// check object
virtual bool checkObject()=0;
// name of the object
std::string name;
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
virtual void hdf_write()=0;
protected:
HamPtr ham0;
std::string hdf_write_file;
RealType cutoff_sparse;
RealType eigcut;
std::string filetype;
std::string filename;
bool test_breakup;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
virtual bool initFromASCII(const std::string& fileName)=0;
virtual bool initFromXML(const std::string& fileName)=0;
virtual bool initFromHDF5(const std::string& fileName)=0;
virtual bool initFromGuess()=0;
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,560 @@
#ifndef QMCPLUSPLUS_AFQMC_SPARSEGENERALHAMILTONIAN_H
#define QMCPLUSPLUS_AFQMC_SPARSEGENERALHAMILTONIAN_H
#include<iostream>
#include<vector>
#include<map>
#include<fstream>
#include<Message/MPIObjectBase.h>
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/config.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Numerics/SparseMatrixOperations.h"
namespace qmcplusplus
{
class SparseGeneralHamiltonian: public HamiltonianBase
{
// template<typename spT> using ShmemAllocator = boost::interprocess::allocator<spT, boost::interprocess::managed_shared_memory::segment_manager>;
// template<typename spT> using SMVector = boost::interprocess::vector<spT, ShmemAllocator<spT>>;
typedef std::vector<s1D<ValueType> >::iterator s1Dit;
typedef std::vector<s2D<ValueType> >::iterator s2Dit;
typedef SMDenseVector<s4D<ValueType> >::iterator s4Dit;
public:
SparseGeneralHamiltonian(Communicate *c):HamiltonianBase(c),orderStates(false),cutoff1bar(1e-8),cutoff2bar(1e-8),cutoff_cholesky(1e-6),has_full_hamiltonian_for_matrix_elements(false),NMAX(-1),ascii_write_file(""),hdf_write_file(""),printEig(false),factorizedHamiltonian(false),v2full_transposed(false),test_2eint(false),zero_bad_diag_2eints(false),test_algo(true),has_hamiltonian_for_selCI(false),rotation("")
{
}
~SparseGeneralHamiltonian() {}
void calculateHSPotentials(const RealType cut, const RealType dt, ComplexSMSpMat& Spvn, TaskGroup& TGprop, std::vector<int>& nvec_per_node, bool paral);
void calculateHSPotentials_Diagonalization(const RealType cut, const RealType dt, ComplexSMSpMat& Spvn, TaskGroup& TGprop, std::vector<int>& nvec_per_node, bool paral);
void calculateHSPotentials_FactorizedHam(const RealType cut, const RealType dt, ComplexSMSpMat& Spvn, TaskGroup& TGprop, std::vector<int>& nvec_per_node, bool paral);
// void calculateHSPotentials_Diagonalization_old(const RealType cut, const RealType dt, ComplexSMSpMat& Spvn);
void calculateOneBodyPropagator(const RealType cut, const RealType dt, ComplexMatrix& Hadd, std::vector<s2D<ComplexType> >& Pkin);
bool parse(xmlNodePtr cur);
// do a general check of parameters for consistency
// make sure object was build consistently and correctly
bool checkObject();
bool createHamiltonianForPureDeterminant(std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b ,std::vector<s1D<ValueType> >&, std::vector<s2D<ValueType> >& , const RealType cut=1e-6, bool closed_shell=false);
bool createHamiltonianForPureDeterminant(std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b , std::vector<s1D<ValueType> >& , ComplexSpMat&, const RealType cut=1e-6, bool closed_shell=false);
bool createHamiltonianForPureDeterminant(std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b , std::vector<s1D<ValueType> >& , ComplexSMSpMat&, const RealType cut=1e-6, bool closed_shell=false);
bool createHamiltonianForGeneralDeterminant(int type, const ComplexMatrix& A,std::vector<s1D<ComplexType> >& hij, ComplexSMSpMat& Vabkl, const RealType cut=1e-6);
inline bool generateFullHamiltonianForME() {
std::map<IndexType,bool> all_alpha,all_beta;
all_alpha.clear();
all_beta.clear();
for(IndexType i=0; i<NMO; i++) all_alpha[i]=true;
for(IndexType i=NMO; i<2*NMO; i++) all_alpha[i]=false;
for(IndexType i=0; i<NMO; i++) all_beta[i]=false;
for(IndexType i=NMO; i<2*NMO; i++) all_beta[i]=true;
SpH2_full_forME.setDims(2*NMO*NMO,2*NMO*NMO);
SpH2_full_forME.setup(head_of_nodes,name+std::string("SpH2_full_forME"),TG.getNodeCommLocal());
if(!createHamiltonianForPureDeterminant(all_alpha,all_beta,H1_full_forME,SpH2_full_forME,1e-5)) {
app_error()<<"Error in createHamiltonianForPureDeterminant during call to generateFullHamiltonianForME(). \n";
return false;
}
has_full_hamiltonian_for_matrix_elements = true;
return true;
}
inline bool getFullHam(std::vector<s1D<ValueType> > *& h, ComplexSMSpMat *& v) {
if(!has_full_hamiltonian_for_matrix_elements)
generateFullHamiltonianForME();
v = &SpH2_full_forME;
h = &H1_full_forME;
return true;
}
// should only be used with CIPSI like methods
// ValueType H(IndexType I, IndexType J) {
// }
// this should never be used outside initialization routines.
ValueType H(IndexType I, IndexType J) {
if( (I>=NMO && J<NMO) || (I<NMO && J>=NMO) ) return ValueType(0);
if(spinRestricted) {
I = (I>=NMO)?(I-NMO):(I);
J = (J>=NMO)?(J-NMO):(J);
}
if(I <= J) {
s2Dit it = std::lower_bound( H1.begin(), H1.end(), std::forward_as_tuple(I,J,static_cast<ValueType>(0.0)),mySort);
if (it != H1.end() && std::get<0>(*it) == I && std::get<1>(*it) == J)
return std::get<2>(*it);
else
return static_cast<ValueType>(0.0);
} else {
s2Dit it = std::lower_bound( H1.begin(), H1.end(), std::forward_as_tuple(J,I,static_cast<ValueType>(0.0)),mySort);
if (it != H1.end() && std::get<0>(*it) == J && std::get<1>(*it) == I)
return myconj(std::get<2>(*it));
else
return static_cast<ValueType>(0.0);
}
}
// this should never be used outside initialization routines.
ValueType H(IndexType I, IndexType J, IndexType K, IndexType L)
{
if( (I>=NMO && K<NMO) || (I<NMO && K>=NMO) ) return ValueType(0);
if( (J>=NMO && L<NMO) || (J<NMO && L>=NMO) ) return ValueType(0);
if(factorizedHamiltonian) {
if(spinRestricted) {
I = (I>=NMO)?(I-NMO):(I);
J = (J>=NMO)?(J-NMO):(J);
K = (K>=NMO)?(K-NMO):(K);
L = (L>=NMO)?(L-NMO):(L);
}
if(!V2_fact.isCompressed()) {
app_error()<<" Error: Using uncompressed V2_fact in: SparseGeneralHamiltonian::H(I,J,K,L). " <<std::endl;
APP_ABORT(" Error: Using uncompressed V2_fact in: SparseGeneralHamiltonian::H(I,J,K,L).");
}
int* cols = V2_fact.column_data();
int* rows = V2_fact.row_data();
int* indx = V2_fact.row_index();
ValueType* vals = V2_fact.values();
int ik = I*NMO+Index2Col(K);
int lj = L*NMO+Index2Col(J);
ValueType val;
val = SparseMatrixOperators::product_SpVSpV<ValueType>(indx[ik+1]-indx[ik],cols+indx[ik],vals+indx[ik],indx[lj+1]-indx[lj],cols+indx[lj],vals+indx[lj]);
return (std::abs(val)>cutoff2bar)?(val):(0);
} else {
if(spinRestricted) {
I = (I>=NMO)?(I-NMO):(I);
J = (J>=NMO)?(J-NMO):(J);
K = (K>=NMO)?(K-NMO):(K);
L = (L>=NMO)?(L-NMO):(L);
}
s4D<ValueType> s = find_smaller_equivalent_OneBar_for_integral_list(std::forward_as_tuple(I,J,K,L,static_cast<ValueType>(0.0)));
s4Dit it = std::lower_bound( V2.begin(), V2.end(), s, mySort);
if (it != V2.end() && std::get<0>(*it)==std::get<0>(s) && std::get<1>(*it)==std::get<1>(s) && std::get<2>(*it)==std::get<2>(s) && std::get<3>(*it)==std::get<3>(s) ) {
if(isComplex( std::get<4>(s) ) ) {
// call it again to get correct value (either V or conj(V))
s = find_smaller_equivalent_OneBar_for_integral_list(std::forward_as_tuple(I,J,K,L,std::get<4>(*it)));
return std::get<4>(s);
} else {
return std::get<4>(*it);
}
} else {
return static_cast<ValueType>(0.0);
}
}
}
ValueType H(IndexType I, IndexType J, IndexType K, IndexType L, std::vector<s4D<ValueType> >& V, int NT )
{
if( (I>=NT && K<NT) || (I<NT && K>=NT) ) return ValueType(0);
if( (J>=NT && L<NT) || (J<NT && L>=NT) ) return ValueType(0);
if(factorizedHamiltonian) {
APP_ABORT(" Error: ValueType H(I,J,K,L,V,NT): not implemented with factorized hamiltonian. \n");
if(spinRestricted) {
I = (I>=NT)?(I-NT):(I);
J = (J>=NT)?(J-NT):(J);
K = (K>=NT)?(K-NT):(K);
L = (L>=NT)?(L-NT):(L);
}
if(!V2_fact.isCompressed()) {
app_error()<<" Error: Using uncompressed V2_fact in: SparseGeneralHamiltonian::H(I,J,K,L,NT). " <<std::endl;
APP_ABORT(" Error: Using uncompressed V2_fact in: SparseGeneralHamiltonian::H(I,J,K,L,NT).");
}
int* cols = V2_fact.column_data();
int* rows = V2_fact.row_data();
int* indx = V2_fact.row_index();
ValueType* vals = V2_fact.values();
int ik = I*NT+K;
int lj = L*NT+J;
if(I>=NT) ik = (I-NT)*NT+(K-NT);
if(J>=NT) lj= (L-NT)*NT+(J-NT);
ValueType val;
val = SparseMatrixOperators::product_SpVSpV<ValueType>(indx[ik+1]-indx[ik],cols+indx[ik],vals+indx[ik],indx[lj+1]-indx[lj],cols+indx[lj],vals+indx[lj]);
return (std::abs(val)>cutoff2bar)?(val):(0);
} else {
if(spinRestricted) {
I = (I>=NT)?(I-NT):(I);
J = (J>=NT)?(J-NT):(J);
K = (K>=NT)?(K-NT):(K);
L = (L>=NT)?(L-NT):(L);
}
s4D<ValueType> s = find_smaller_equivalent_OneBar_for_integral_list(std::forward_as_tuple(I,J,K,L,static_cast<ValueType>(0.0)));
std::vector<s4D<ValueType> >::iterator it = std::lower_bound( V.begin(), V.end(), s, mySort);
if (it != V.end() && std::get<0>(*it)==std::get<0>(s) && std::get<1>(*it)==std::get<1>(s) && std::get<2>(*it)==std::get<2>(s) && std::get<3>(*it)==std::get<3>(s) ) {
if(isComplex( std::get<4>(s) ) ) {
// call it again to get correct value (either V or conj(V))
s = find_smaller_equivalent_OneBar_for_integral_list(std::forward_as_tuple(I,J,K,L,std::get<4>(*it)));
return std::get<4>(s);
} else {
return std::get<4>(*it);
}
} else {
return static_cast<ValueType>(0.0);
}
}
}
ValueType H_2bar(IndexType I, IndexType J, IndexType K, IndexType L)
{
if( (I>=NMO && K<NMO) || (I<NMO && K>=NMO) ) return ValueType(0);
if( (J>=NMO && L<NMO) || (J<NMO && L>=NMO) ) return ValueType(0);
if( I==J || K==L ) return ValueType(0);
return H(I,J,K,L) - H(I,J,L,K);
}
bool initializeCCProjector(ComplexMatrix& Pmat, RealType cut=1e-6);
void generate_selCI_Ham(double cutoff);
void get_selCI_excitations(OrbitalType I, OrbitalType J, int spinSector, RealType cutoff, OrbitalType* occs, std::vector<OrbitalType>& KLs);
protected:
// name of restart file
std::string hdf_write_file;
std::string ascii_write_file;
// maximum number of MO in integral file
int NMAX;
bool test_2eint;
bool zero_bad_diag_2eints;
bool test_algo;
// stores one body integrals in s2D format
std::vector<s2D<ValueType> > H1;
// shared memory vectors
SMDenseVector<s4D<ValueType> > V2;
SMDenseVector<s4D<ValueType> > V2_2bar;
bool v2full_transposed;
std::vector<long> KL;
std::vector<long> IJ;
SMDenseVector<s4D<ValueType> > V2_full;
std::string rotation;;
ValueMatrix rotationMatrix;
//
bool factorizedHamiltonian;
// factorized Ham : V2(ik,lj) = sum_n V2_fact(ik,n)*conj(V2_fact(lj,n))
// similar to Spvn, without -dt/2 factor and without L+L* / L-L* rotation
// NOTE: Make this identical to Spvn and return pointer to this object to propagator
// this avoids having 2 copied when reading in 3Index form
ValueSMSpMat V2_fact;
std::vector<int> cholesky_residuals;
bool has_hamiltonian_for_selCI;
// This is going to be a problem with enough orbitals, e.g. NMO ~> 1000
int nmax_KL_selCI;
IndexVector IJ_aa, IJ_bb, IJ_ab;
SMDenseVector<s2D<ValueType> > V2_selCI_aa;
SMDenseVector<s2D<ValueType> > V2_selCI_ab;
SMDenseVector<s2D<ValueType> > V2_selCI_bb;
bool has_full_hamiltonian_for_matrix_elements;
ComplexSMSpMat SpH2_full_forME;
std::vector<s1D<ValueType> > H1_full_forME;
// do we need to sort the states according to eigenvalue?
bool orderStates;
bool printEig;
bool close_shell;
// cutoff to read 1bar terms in hamiltonian matrices
double cutoff1bar;
// cutoff to create 2bar terms in hamiltonian matrices
double cutoff2bar;
// cutoff for cholesky
double cutoff_cholesky;
// needed only if we are ordering states (Molpro issue)
// occup might be different from wfn, so do not mix
std::vector<IndexType> occup_alpha;
std::vector<IndexType> virtual_alpha;
std::map<IndexType,bool> isOcc_alpha;
std::vector<IndexType> occup_beta;
std::vector<IndexType> virtual_beta;
std::map<IndexType,bool> isOcc_beta;
// right now assuming core states are the bottom NCX states in the list, (possibly after reordering).
// Generalize this to partition states into (core,occ,virtual) based on arbitrary lists.
std::vector<IndexType> core_alpha;
std::map<IndexType,bool> isCore_alpha;
std::vector<IndexType> core_beta;
std::map<IndexType,bool> isCore_beta;
std::vector<ValueType> eig;
std::vector<IndexType> orbSymm;
std::vector<IndexType> occupPerSymm_alpha;
std::vector<IndexType> occupPerSymm_beta;
bool initFromASCII(const std::string& fileName)
{
// allow for different formats later on.
// right now only FCIDUMP is allowed
if(filetype == "fcidump")
return readFCIDUMP(fileName, false); //, H1, V2);
else
return false;
}
bool initFromXML(const std::string& fileName) {}
bool initFromHDF5(const std::string& fileName);
void hdf_write();
void ascii_write();
bool generate_V2_2bar();
// read integrals from ascii file, assuming FCIDUMP format
// returns them in sparse form, using extended indexing to
// encode alpha/beta, so index goes from 0-2*NMO-1
// initFromFCIDUMP takes care of storing them in the correct format of the
// derived class
bool readFCIDUMP(const std::string& fileName, bool minimizeIO);//,
// std::vector<s2D<ValueType> >& , std::vector<s4D<ValueType> >& );
// count number of elements in file
bool countElementsFromFCIDUMP(std::ifstream&,int&,int&,int&,int&,int&,int&,int&,int&,std::map<IndexType,IndexType>&, std::map<IndexType,IndexType>&,int& n);
// read elements in FCIDUMP
bool readElementsFromFCIDUMP(std::ifstream&,std::vector<s2D<ValueType> >&, std::vector<s2D<ValueType> >&, SMDenseVector<s4D<ValueType> >&, std::vector<s4D<ValueType> >&, std::vector<s4D<ValueType> >&, ValueSMSpMat&, ValueSMSpMat&, ValueSMSpMat&, std::map<IndexType,IndexType>&, std::map<IndexType,IndexType>&);
// find all permutation of indexes among symmetry equivalent terms
// NOT TU BE USED OUTSIDE INITIALIZATION! SLOW!
void find_equivalent_OneBar_for_integral_list(s4D<ValueType> ijkl, std::vector<s4D<ValueType> >& v);
// find all permutation of indexes among symmetry equivalent terms
// NOT TU BE USED OUTSIDE INITIALIZATION! SLOW!
void find_equivalent_TwoBar_for_integral_list(s4D<ValueType> ijkl, std::vector<s4D<ValueType> >& v);
// find all permutation of indexes among symmetry equivalent terms
// eliminates redundant terms and adjusts weight
// redundant terms are those that are repeated when contracting against G*G
// example: ik/jl and jl/ik. Instead of keeping 2, keep one and multiply V by 2.
// NOT TU BE USED OUTSIDE INITIALIZATION! SLOW!
void find_equivalent_OneBar_for_hamiltonian_generation(s4D<ValueType> ijkl, std::vector<s4D<ValueType> >& v);
// find smaller permutation of indexes among symmetry equivalent terms
// This is the one stored in V2.
// NOT TU BE USED OUTSIDE INITIALIZATION! SLOW!
void find_equivalent_TwoBar_for_hamiltonian_generation(s4D<ValueType> ijkl, std::vector<s4D<ValueType> >& v);
// find smaller permutation of indexes among symmetry equivalent terms
// This is the one stored in V2.
// NOT TU BE USED OUTSIDE INITIALIZATION! SLOW!
s4D<ValueType> find_smaller_equivalent_OneBar_for_integral_list(s4D<ValueType> ijkl);
// find smaller permutation of indexes among symmetry equivalent terms
// This is the one stored in V2_2bar.
// NOT TU BE USED OUTSIDE INITIALIZATION! SLOW!
s4D<ValueType> find_smaller_equivalent_TwoBar_for_integral_list(s4D<ValueType> ijkl);
//bool getFCIDUMPline(std::ifstream& in, ValueType& val, IndexType& ap, IndexType& bp, IndexType& cp, IndexType& dp);
inline int getSpinSector(const IndexType& i, const IndexType& j, const IndexType& k, const IndexType& l) {
if(i < NMO) {
if(j < NMO) return 0; // <alpha,alpha | alpha,alpha>
else return 1; // <alpha,beta | alpha,beta >
} else {
if(j < NMO) return 2; // <beta,alpha | beta,alpha>
else return 3; // <beta,beta | beta,beta >
}
}
inline bool goodSpinSector(const IndexType& i, const IndexType& j, const IndexType& k, const IndexType& l, int NT) {
if(i < NT) {
if(j < NT) // <alpha,alpha | alpha,alpha>
return (k<NT&&l<NT);
else // <alpha,beta | alpha,beta >
return (k<NT&&l>=NT);
} else {
if(j < NT) // <beta,alpha | beta,alpha>
return (k>=NT&&l<NT);
else // <beta,beta | beta,beta >
return (k>=NT&&l>=NT);
}
}
inline int getSpinSector(const IndexType& i, const IndexType& j) {
if(i < NMO) return 0;
return 1;
}
inline IndexType Index2Col(IndexType i) {
#if AFQMC_DEBUG
// assert( ((i<NMO)&&(j<NMO)) || ((i>NMO)&&(j>NMO)) )
#endif
return (i<NMO)?(i):(i-NMO);
}
inline IndexType Index2Mat(IndexType i, IndexType j) {
#if AFQMC_DEBUG
// assert( ((i<NMO)&&(j<NMO)) || ((i>NMO)&&(j>NMO)) )
#endif
return (i<NMO)?(i*NMO+j):(NMO*NMO+(i-NMO)*NMO+j-NMO);
}
// used to sort snD values using only indexes
_mySort_snD_ mySort;
// used to identify equal index sets (value is not compared)
_myEqv_snD_ myEqv;
void find_all_contributions_to_hamiltonian_closed_shell(OrbitalType i, OrbitalType j, OrbitalType k, OrbitalType l, ValueType J1, ValueType J2, ValueType J3, double cut, std::vector<s4D<ValueType> >& v);
void find_all_contributions_to_hamiltonian_spinRestricted(OrbitalType i, OrbitalType j, OrbitalType k, OrbitalType l, ValueType J1, ValueType J2, ValueType J3, double cut, std::vector<s4D<ValueType> >& v);
void find_all_contributions_to_hamiltonian_general(OrbitalType i, OrbitalType j, OrbitalType k, OrbitalType l, ValueType J1, ValueType J2, ValueType J3, double cut, std::vector<s4D<ValueType> >& v);
int count_allowed_terms(std::vector<s4D<ValueType> >& vs4D, std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b)
{
int cnt=0;
for(std::vector<s4D<ValueType> >::iterator it = vs4D.begin(); it!=vs4D.end(); it++)
if( (occ_a[std::get<0>(*it)]||occ_b[std::get<0>(*it)]) && (occ_a[std::get<1>(*it)]||occ_b[std::get<1>(*it)]) ) cnt++;
return cnt;
}
inline void push_ijkl(OrbitalType i, OrbitalType j, OrbitalType k, OrbitalType l, ValueType V, std::vector<s4D<ValueType> >& v) {
long ik = Index2Mat(i,k);
long jl = Index2Mat(j,l);
if( ik == jl )
v.push_back(std::make_tuple(i,j,k,l,V));
else if(ik < jl)
v.push_back(std::make_tuple(i,j,k,l,2*V));
else
v.push_back(std::make_tuple(j,i,l,k,2*V));
}
void add_allowed_terms(std::vector<s4D<ValueType> >& vs4D, std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b, std::vector<s2D<ValueType> >& V )
{
for(std::vector<s4D<ValueType> >::iterator it = vs4D.begin(); it!=vs4D.end(); it++)
if( (occ_a[std::get<0>(*it)]||occ_b[std::get<0>(*it)]) && (occ_a[std::get<1>(*it)]||occ_b[std::get<1>(*it)]) )
V.push_back( std::forward_as_tuple(Index2Mat(std::get<0>(*it),std::get<2>(*it)) , Index2Mat(std::get<1>(*it),std::get<3>(*it)) , std::get<4>(*it)) );
}
void add_allowed_terms(std::vector<s4D<ValueType> >& vs4D, std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b, ComplexSpMat& V )
{
for(std::vector<s4D<ValueType> >::iterator it = vs4D.begin(); it!=vs4D.end(); it++)
if( (occ_a[std::get<0>(*it)]||occ_b[std::get<0>(*it)]) && (occ_a[std::get<1>(*it)]||occ_b[std::get<1>(*it)]) )
V.add( Index2Mat(std::get<0>(*it),std::get<2>(*it)) , Index2Mat(std::get<1>(*it),std::get<3>(*it)) , toComplex(std::get<4>(*it)));
}
int add_allowed_terms(std::vector<s4D<ValueType> >& vs4D, std::map<IndexType,bool>& occ_a, std::map<IndexType,bool>& occ_b, ComplexSMSpMat& V , bool needs_locks=false)
{
int cnt=0;
for(std::vector<s4D<ValueType> >::iterator it = vs4D.begin(); it!=vs4D.end(); it++)
if( (occ_a[std::get<0>(*it)]||occ_b[std::get<0>(*it)]) && (occ_a[std::get<1>(*it)]||occ_b[std::get<1>(*it)]) ) {
cnt++;
V.add( Index2Mat(std::get<0>(*it),std::get<2>(*it)) , Index2Mat(std::get<1>(*it),std::get<3>(*it)) , toComplex(std::get<4>(*it)), needs_locks);
}
return cnt;
}
void print_tuple(s1D<ValueType>& t) {
std::cout<<" - " <<std::get<0>(t) <<" " <<std::get<1>(t) <<std::endl;
}
void print_tuple(s4D<ValueType>& v) {
std::cout<<std::get<4>(v) <<" "
<<std::get<0>(v) <<" "
<<std::get<1>(v) <<" "
<<std::get<2>(v) <<" "
<<std::get<3>(v) <<std::endl;
}
void print_Vs4D(std::vector<s4D<ValueType> >& v) {
for(int i=0; i<v.size(); i++)
std::cout<<std::get<4>(v[i]) <<" "
<<std::get<0>(v[i]) <<" "
<<std::get<1>(v[i]) <<" "
<<std::get<2>(v[i]) <<" "
<<std::get<3>(v[i]) <<std::endl;
std::cout<<std::endl;
}
inline long mapUT(long i, long j, long N) {
if(j >= i)
return N*i + j - (i*(i+1))/2;
else
return N*j + i - (j*(j+1))/2;
}
inline long mapUT_woD(long i, long j, long N) {
if(j == i) {
APP_ABORT(" Error in mapUT_woD: This should not happen. \n");
} else if(j > i)
return N*i + j - (i*(i+1))/2 - i-1;
else
return N*j + i - (j*(j+1))/2 - j-1;
}
inline int mapUT(int i, int j, int N) {
if(j >= i)
return N*i + j - (i*(i+1))/2;
else
return N*j + i - (j*(j+1))/2;
}
inline int mapUT_woD(int i, int j, int N) {
if(j == i) {
APP_ABORT(" Error in mapUT_woD: This should not happen. \n");
} else if(j > i)
return N*i + j - (i*(i+1))/2 - i-1;
else
return N*j + i - (j*(j+1))/2 - j-1;
}
bool communicate_Vijkl(ComplexSMSpMat&);
};
}
#endif

2
src/AFQMC/Matrix/NOTE.txt Executable file
View File

@ -0,0 +1,2 @@
Do not wrap OhmmsPETE/Matrix,
there seems to be no reason to use other libraries,

495
src/AFQMC/Matrix/SMDenseVector.h Executable file
View File

@ -0,0 +1,495 @@
#ifndef QMCPLUSPLUS_AFQMC_SMDENSEVECTOR_H
#define QMCPLUSPLUS_AFQMC_SMDENSEVECTOR_H
#include<iostream>
#include<vector>
#include<tuple>
#include <cassert>
#include<algorithm>
#include<complex>
#include"AFQMC/config.0.h"
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/vector.hpp>
#define ASSERT_VECTOR
namespace qmcplusplus
{
// wrapper for boost::interprocess::vector
template<class T>
class SMDenseVector
{
public:
template<typename spT> using ShmemAllocator = boost::interprocess::allocator<spT, boost::interprocess::managed_shared_memory::segment_manager>;
template<typename spT> using boost_SMVector = boost::interprocess::vector<spT, ShmemAllocator<spT>>;
typedef T Type_t;
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef const int* const_indxPtr;
typedef int* indxPtr;
typedef typename boost_SMVector<T>::iterator iterator;
typedef typename boost_SMVector<T>::const_iterator const_iterator;
typedef typename boost_SMVector<int>::iterator int_iterator;
typedef typename boost_SMVector<int>::const_iterator const_int_iterator;
typedef boost_SMVector<T> This_t;
SMDenseVector<T>():head(false),ID(""),SMallocated(false),vals(NULL),share_buff(NULL),mutex(NULL),
segment(NULL),alloc_T(NULL),alloc_mutex(NULL),alloc_uchar(NULL)
{
remover.ID="NULL";
remover.head=false;
}
~SMDenseVector<T>()
{
if(segment!=NULL) {
delete segment;
boost::interprocess::shared_memory_object::remove(ID.c_str());
}
}
// this should probably be disallowed
SMDenseVector(const SMDenseVector<T> &rhs)
{
// ID = rhs.ID; // is this a good idea???
// head = rhs.head;
APP_ABORT(" Error: SMDenseVector(SMDenseVector rhs) copy constructor has been disabled.");
}
inline void setup(bool hd, std::string ii, MPI_Comm comm_) {
head=hd;
ID=ii;
remover.ID=ii;
remover.head=hd;
comm=comm_;
}
inline void reserve(int nnz, bool allow_reduce = false)
{
if(vals==NULL || (vals!=NULL && vals->capacity() < nnz) || (vals!=NULL && vals->capacity() > nnz && allow_reduce))
allocate(nnz,allow_reduce);
if(head) vals->reserve(nnz);
barrier();
}
template<typename T1>
void share(T1* x, int n, bool sender) {
if(!SMallocated)
APP_ABORT("Error: Call to SMDenseVector::share with unallocated object. \n");
assert( sizeof(T1)*n < sizeof(unsigned char)*share_buff->size() );
if(sender) {
std::memcpy(&((*share_buff)[0]),x,sizeof(T1)*n);
barrier();
} else {
barrier();
std::memcpy(x,&((*share_buff)[0]),sizeof(T1)*n);
}
barrier();
}
template<typename T1>
void share(std::vector<T1>& x, int n, bool sender) {
if(!SMallocated)
APP_ABORT("Error: Call to SMDenseVector::share with unallocated object. \n");
assert( sizeof(T1)*n < sizeof(unsigned char)*share_buff->size() );
assert( x.size() >= n);
if(sender) {
std::memcpy(&((*share_buff)[0]),x.data(),sizeof(T1)*n);
barrier();
} else {
barrier();
std::memcpy(x.data(),&((*share_buff)[0]),sizeof(T1)*n);
}
barrier();
}
inline void barrier() {
MPI_Barrier(comm);
}
inline bool deallocate()
{
SMallocated = false;
barrier();
if(!head) {
try{
delete segment;
segment=NULL;
} catch(std::bad_alloc&) {
std::cerr<<"Problems deleting segment in SMDenseVector::deallocate()." <<std::endl;
return false;
}
}
barrier();
if(head) {
try{
delete segment;
segment=NULL;
boost::interprocess::shared_memory_object::remove(ID.c_str());
} catch(std::bad_alloc&) {
std::cerr<<"Problems de-allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
}
barrier();
}
// this routine does not allow grow/shrink, meant in cases where only head can call it
inline bool allocate_serial(int n)
{
if(!head) return false; /* XA: This was returning nothing, I assume false is the right thing to return here */
if(vals!=NULL && vals->capacity() >= n) return true;
memory = sizeof(boost::interprocess::interprocess_mutex)+n*sizeof(T)+1000*sizeof(unsigned char)+8000;
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cout<<" Found managed_shared_memory segment, removing. Careful with persistent SHM segment. \n";
boost::interprocess::shared_memory_object::remove(ID.c_str());
segment=NULL;
}
if(segment==NULL) {
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cerr<<"Problems setting up managed_shared_memory in SMDenseVector." <<std::endl;
return false;
}
}
try {
alloc_T = new ShmemAllocator<T>(segment->get_segment_manager());
alloc_uchar = new ShmemAllocator<unsigned char>(segment->get_segment_manager());
share_buff = segment->construct<boost_SMVector<unsigned char>>("share_buff")(*alloc_uchar);
share_buff->resize(1000);
mutex = segment->construct<boost::interprocess::interprocess_mutex>("mutex")();
vals = segment->construct<boost_SMVector<T>>("vals")(*alloc_T);
vals->reserve(n);
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
SMallocated=true;
return true;
}
inline bool allocate(int n, bool allow_reduce=false)
{
bool grow = false;
uint64_t old_sz = (segment==NULL)?0:(segment->get_size());
if(SMallocated) {
if(vals!=NULL && vals->capacity() >= n && !allow_reduce) return true;
grow = true;
if(!head) { // delay delete call on head in case you need to shrink vector
delete segment;
segment=NULL;
}
}
barrier();
if(head) {
memory = sizeof(boost::interprocess::interprocess_mutex)+n*sizeof(T)+1000*sizeof(unsigned char)+8000;
if(grow) {
if(memory > old_sz) {
uint64_t extra = memory - old_sz;
delete segment;
segment=NULL;
if(!boost::interprocess::managed_shared_memory::grow(ID.c_str(), extra)) {
std::cerr<<" Error growing shared memory in SMDenseVector::allocate(). \n";
return false;
}
} else {
segment->destroy<boost_SMVector<T>>("vals");
vals = segment->construct<boost_SMVector<T>>("vals")(*alloc_T);
vals->reserve(n);
delete segment;
segment=NULL;
if(!boost::interprocess::managed_shared_memory::shrink_to_fit(ID.c_str())) {
std::cerr<<" Error in shrink_to_fit shared memory in SMDenseVector::allocate(). \n";
return false;
}
}
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::open_only, ID.c_str());
vals = segment->find<boost_SMVector<T>>("vals").first;
share_buff = segment->find<boost_SMVector<unsigned char>>("share_buff").first;
mutex = segment->find<boost::interprocess::interprocess_mutex>("mutex").first;
assert(vals != 0);
assert(share_buff != 0);
assert(mutex != 0);
vals->reserve(n);
} catch(std::bad_alloc&) {
std::cerr<<"Problems opening shared memory in SMDenseVector::allocate() ." <<std::endl;
return false;
}
} else {
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cout<<" Found managed_shared_memory segment, removing. Careful with persistent SHM segment. \n";
boost::interprocess::shared_memory_object::remove(ID.c_str());
segment=NULL;
}
if(segment==NULL) {
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cerr<<"Problems setting up managed_shared_memory in SMDenseVector." <<std::endl;
return false;
}
}
try {
alloc_T = new ShmemAllocator<T>(segment->get_segment_manager());
alloc_uchar = new ShmemAllocator<unsigned char>(segment->get_segment_manager());
share_buff = segment->construct<boost_SMVector<unsigned char>>("share_buff")(*alloc_uchar);
share_buff->resize(1000);
mutex = segment->construct<boost::interprocess::interprocess_mutex>("mutex")();
vals = segment->construct<boost_SMVector<T>>("vals")(*alloc_T);
vals->reserve(n);
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
}
}
barrier();
SMallocated=true;
initializeChildren();
return true;
}
// only call this when all arrays have been allocated and modified
inline bool initializeChildren()
{
if(head) return true;
// delete segment in case this routine is called multiple times.
// SHM is not removed, just the mapping of the local process.
if(segment!=NULL) {
delete segment;
segment=NULL;
}
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::open_only, ID.c_str());
vals = segment->find<boost_SMVector<T>>("vals").first;
share_buff = segment->find<boost_SMVector<unsigned char>>("share_buff").first;
mutex = segment->find<boost::interprocess::interprocess_mutex>("mutex").first;
assert(vals != 0);
assert(share_buff != 0);
assert(mutex != 0);
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector: initializeChildren() ." <<std::endl;
return false;
}
return true;
}
// resize is probably the best way to setup the vector
inline void resize(int nnz, bool allow_reduce=false)
{
if(vals==NULL || (vals!=NULL && vals->capacity() < nnz) ) {
allocate(nnz,allow_reduce);
} else if(vals!=NULL && vals->capacity() > nnz && allow_reduce) {
std::vector<T> tmp;
if(head) {
tmp.resize(nnz);
std::copy(vals->begin(),vals->begin()+nnz,tmp.begin());
}
allocate(nnz,allow_reduce);
if(head) {
vals->resize(nnz);
std::copy(tmp.begin(),tmp.begin()+nnz,vals->begin());
}
}
if(head) vals->resize(nnz);
barrier();
}
// does not allow grow/shrink
inline void resize_serial(int nnz)
{
if(!head) return;
if(vals==NULL || (vals!=NULL && vals->capacity() < nnz) )
APP_ABORT("Error: Calling SMDenseVector::resize_serial(n) without enough capacity. \n");
vals->resize(nnz);
}
inline void clear() {
if(!head) return;
if(!SMallocated) return;
vals->clear();
}
inline unsigned long size() const
{
return (vals!=NULL)?(vals->size()):0;
}
inline const_pointer values() const
{
return (vals!=NULL)?(&((*vals)[0])):NULL;
}
inline pointer values()
{
return (vals!=NULL)?(&((*vals)[0])):NULL;
}
inline bool isAllocated() {
return (SMallocated)&&(vals!=NULL);
}
inline This_t& operator=(const SMDenseVector<T> &rhs)
{
APP_ABORT(" Error: SMDenseVector(SMDenseVector rhs) operator= has been disabled.");
//resize(rhs.size());
//if(!head) return *this;
//(*vals)=*(rhs.vals);
//return *this;
}
inline Type_t& operator()(unsigned int i)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
return (*vals)[i];
}
inline Type_t& operator[](unsigned int i)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
return (*vals)[i];
}
inline void add(const int i, const T& v, bool needs_locks=false)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
if(needs_locks) {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*mutex);
(*vals)[i]=v;
} else {
if(!head) return;
(*vals)[i]=v;
}
}
inline uint64_t memoryUsage() { return memory; }
inline int capacity() { return (vals==NULL)?0:vals->capacity(); }
inline void push_back(const T& v, bool needs_locks=false)
{
assert(vals != NULL);
assert(vals->capacity() >= vals->size()+1 );
if(needs_locks) {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*mutex);
vals->push_back(v);
} else {
vals->push_back(v);
}
}
inline SMDenseVector<T>& operator*=(const RealType rhs )
{
if(!head) return *this;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
inline SMDenseVector<T>& operator*=(const std::complex<RealType> rhs )
{
if(!head) return *this;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
friend std::ostream& operator<<(std::ostream& out, const SMDenseVector<T>& rhs)
{
for(int i=0; i<rhs.vals->size(); i++)
out<<"(" <<(*(rhs.myrows))[i] <<"," <<(*(rhs.colms))[i] <<":" <<(*(rhs.vals))[i] <<")\n";
return out;
}
// this is ugly, but I need to code quickly
// so I'm doing this to avoid adding hdf5 support here
inline boost_SMVector<T>* getVector() const { return vals; }
inline iterator begin() { assert(vals!=NULL); return vals->begin(); }
inline const_iterator begin() const { assert(vals!=NULL); return vals->begin(); }
inline const_iterator end() const { assert(vals!=NULL); return vals->end(); }
inline iterator end() { assert(vals!=NULL); return vals->end(); }
inline T& back() { assert(vals!=NULL); return vals->back(); }
boost::interprocess::interprocess_mutex* getMutex()
{
return mutex;
}
private:
boost::interprocess::interprocess_mutex *mutex;
boost_SMVector<T> *vals;
boost_SMVector<unsigned char> *share_buff;
bool head;
std::string ID;
bool SMallocated;
uint64_t memory=0;
boost::interprocess::managed_shared_memory *segment;
ShmemAllocator<T> *alloc_T;
ShmemAllocator<boost::interprocess::interprocess_mutex> *alloc_mutex;
ShmemAllocator<unsigned char> *alloc_uchar;
// using MPI for barrier calls until I find solution
MPI_Comm comm;
struct shm_remove
{
bool head;
std::string ID;
shm_remove() {
if(head) boost::interprocess::shared_memory_object::remove(ID.c_str());
}
~shm_remove(){
if(head) boost::interprocess::shared_memory_object::remove(ID.c_str());
}
} remover;
};
}
#endif

1173
src/AFQMC/Matrix/SMSparseMatrix.h Executable file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,668 @@
#ifndef QMCPLUSPLUS_AFQMC_SMSPARSEMATRIX_H
#define QMCPLUSPLUS_AFQMC_SMSPARSEMATRIX_H
#include<iostream>
#include<vector>
#include<tuple>
#include<assert.h>
#include<algorithm>
#include"AFQMC/config.0.h"
#define ASSERT_SPARSEMATRIX
#if defined(USE_EIGEN)
namespace qmcplusplus
{
}
#else // In this case, use OhhmsPETE and your sparse matrix class
namespace qmcplusplus
{
// class that implements a sparse matrix in CSR format
template<class T>
class SMSparseMatrix
{
public:
template<typename spT> using ShmemAllocator<spT> = boost::interprocess::allocator<spT, boost::interprocess::managed_shared_memory::segment_manager>;
template<typename spT> using SMVector = boost::interprocess::vector<spT, ShmemAllocator<spT>>;
typedef T Type_t;
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef const int* const_indxPtr;
typedef int* indxPtr;
typedef typename SMVector<T>::iterator iterator;
typedef typename SMVector<T>::const_iterator const_iterator;
typedef SMSparseMatrix<T> This_t;
SMSparseMatrix<T>():nr(0),nc(0),compressed(false),zero_based(true)
{
}
SMSparseMatrix<T>(int n):nr(n),nc(n),compressed(false),zero_based(true)
{
}
SMSparseMatrix<T>(int n,int m):nr(n),nc(m),compressed(false),zero_based(true)
{
}
~SMSparseMatrix<T>()
{
}
SMSparseMatrix(const SMSparseMatrix<T> &rhs)
{
compressed=rhs.compressed;
zero_based=true;
nr=rhs.nr;
nc=rhs.nc;
if(!head) return;
*vals=rhs.(*vals);
*myrows=rhs.(*myrows);
*colms=rhs.(*colms);
*rowIndex=rhs.(*rowIndex);
}
inline void reserve(int n)
{
if(!head) return;
vals->reserve(n);
myrows->reserve(n);
colms->reserve(n);
rowIndex->reserve(nr+1);
}
inline void resize_arrays(int nnz)
{
if(!head) return;
vals->resize(nnz);
myrows->resize(nnz);
colms->resize(nnz);
rowIndex->resize(nr+1);
}
inline void clear() {
compressed=false;
zero_based=true;
if(!head) return;
vals->clear();
colms->clear();
myrows->clear();
rowIndex->clear();
}
inline void setDims(int n, int m)
{
nr=n;
nc=m;
compressed=false;
zero_based=true;
clear();
}
inline void setCompressed()
{
compressed=true;
}
inline bool isCompressed() const
{
return compressed;
}
inline int size() const
{
return vals->size();
}
inline int rows() const
{
return nr;
}
inline int cols() const
{
return nc;
}
inline const_pointer values() const
{
return &((*vals)[0]);
}
inline pointer values()
{
return &((*vals)[0]);
}
inline const_indxPtr column_data() const
{
return &((*colms)[0]);
}
inline indxPtr column_data()
{
return &((*colms)[0]);
}
inline const_indxPtr row_data() const
{
return &((*myrows)[0]);
}
inline indxPtr row_data()
{
return &((*myrows)[0]);
}
inline const_indxPtr row_index() const
{
return &((*rowIndex)[0]);
}
inline indxPtr row_index()
{
return &((*rowIndex)[0]);
}
inline This_t& operator=(const SMSparseMatrix<T> &rhs)
{
compressed=rhs.compressed;
zero_based=rhs.zero_based;
nr=rhs.nr;
nc=rhs.nc;
if(!head) return;
(*vals)=rhs.(*vals);
(*myrows)=rhs.(*myrows);
(*colms)=rhs.(*colms);
(*rowIndex)=rhs.(*rowIndex);
}
inline int find_element(int i, int j) {
return 0;
}
inline Type_t& operator()(int i, int j)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<nr && j>=0 && j<nc && compressed);
#endif
return (*vals)[find_element(i,j)];
}
inline Type_t operator()( int i, int j) const
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<nr && j>=0 && j<nc && compressed);
#endif
return (*vals)[find_element(i,j)];
}
inline void add(const int i, const int j, const T& v)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<nr && j>=0 && j<nc);
#endif
compressed=false;
if(!head) return;
myrows->push_back(i);
colms->push_back(j);
vals->push_back(v);
}
inline bool remove_repeated()
{
#ifdef ASSERT_SPARSEMATRIX
assert(myrows->size() == colms->size() && myrows->size() == vals->size());
#endif
compressed=false;
if(!head) return true;
for(std::vector<int>::iterator itri=myrows->begin(); itri<myrows->end(); itri++)
{
int ki = std::distance( myrows->begin(), itri );
for(std::vector<int>::iterator itrj=itri+1; itrj<myrows->end(); itrj++)
{
int kj = std::distance( myrows->begin(), itrj );
if( *itri == *itrj && (*colms)[ki] == (*colms)[kj] ) {
if((*vals)[ki] != (*vals)[kj]) {
app_error()<<" Error in call to SMSparseMatrix::remove_repeated. Same indexes with different values. \n";
app_error()<<"i: ri, ci, vi: "
<<ki <<" "
<<*itri <<" "
<<(*colms)[ki] <<" "
<<(*vals)[ki] <<"\n"
<<"j: rj, cj, vj: "
<<kj <<" "
<<*itrj <<" "
<<colms[kj] <<" "
<<vals[kj] <<std::endl;
return false;
}
itrj = myrows->erase(itrj);
colms->erase( colms->begin()+kj );
vals->erase( vals->begin()+kj );
}
}
}
return true;
}
inline void compress_old()
{
#ifdef ASSERT_SPARSEMATRIX
assert(myrows->size() == colms->size() && myrows->size() == vals->size());
#endif
if(!head) { compressed=true; return; }
// This is not efficient. Write your own iterator to swap all arrays simultaneously during sort
// Simple options for now:
// 1. use memory and efficient std::sort
// 2. no memory but my inefficient algorithm???
// Using #1 for now!!!
// order along myrows
int n=myrows->size();
std::vector<std::tuple<int,int> > toSort;
toSort.reserve(n);
for(int i=0; i<n; i++) toSort.push_back(std::forward_as_tuple(myrows[i],i));
std::sort(toSort.begin(),toSort.end());
std::vector<T> tmp;
tmp=vals;
myrows=colms;
for(int i=0; i<n; i++) {
int k=std::get<1>(toSort[i]);
colms[i] = myrows[k];
vals[i] = tmp[k];
}
for(int i=0; i<n; i++)
myrows[i] = std::get<0>(toSort[i]);
if(!std::is_sorted(myrows->begin(),myrows->end()))
std::cout<<"ERROR: list is not sorted. \n" <<std::endl;
// define rowIndex
rowIndex->resize(nr+1);
int curr=-1;
for(int n=0; n<myrows->size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows->back()+1; i<rowIndex->size(); i++)
rowIndex[i] = vals->size();
// order within each rowIndex block
for(int k=0; k<nr; k++) {
if(rowIndex[k] == rowIndex[k+1]) continue;
toSort.clear();
tmp.clear();
for(int i=rowIndex[k],p=0; i<rowIndex[k+1]; i++,p++) toSort.push_back(std::forward_as_tuple(colms[i],p));
for(int i=rowIndex[k]; i<rowIndex[k+1]; i++) tmp.push_back(vals[i]);
std::sort(toSort.begin(),toSort.end());
for(int i=rowIndex[k],p=0; i<rowIndex[k+1]; i++,p++) {
colms[i] = std::get<0>(toSort[p]);
vals[i] = tmp[std::get<1>(toSort[p])];
}
}
compressed=true;
}
inline void compress()
{
#ifdef ASSERT_SPARSEMATRIX
assert(myrows->size() == colms->size() && myrows->size() == vals->size());
#endif
if(!head) { compressed=true; return; }
// order along myrows
int n=myrows->size();
sort_rows(0,n-1);
if(!std::is_sorted(myrows->begin(),myrows->end()))
std::cout<<"ERROR: list is not sorted. \n" <<std::endl;
// define rowIndex
rowIndex->resize(nr+1);
int curr=-1;
for(int n=0; n<myrows->size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows->back()+1; i<rowIndex->size(); i++)
rowIndex[i] = vals->size();
// order within each rowIndex block
for(int k=0; k<nr; k++) {
if(rowIndex[k] == rowIndex[k+1]) continue;
sort_colms(rowIndex[k],rowIndex[k+1]-1);
}
compressed=true;
}
void sort_rows(int left, int right) {
int i = left, j = right;
auto pivot = myrows[(left + right) / 2];
/* partition */
while (i <= j) {
while (myrows[i] < pivot)
i++;
while (myrows[j] > pivot)
j--;
if (i <= j) {
std::swap(myrows[i],myrows[j]);
std::swap(colms[i],colms[j]);
std::swap(vals[i++],vals[j--]);
}
};
/* recursion */
if (left < j)
sort_rows(left, j);
if (i < right)
sort_rows(i, right);
}
void sort_colms(int left, int right) {
int i = left, j = right;
auto pivot = colms[(left + right) / 2];
/* partition */
while (i <= j) {
while (colms[i] < pivot)
i++;
while (colms[j] > pivot)
j--;
if (i <= j) {
std::swap(colms[i],colms[j]);
std::swap(vals[i++],vals[j--]);
}
};
/* recursion */
if (left < j)
sort_colms(left, j);
if (i < right)
sort_colms(i, right);
}
inline void initFroms1D(std::vector<std::tuple<IndexType,RealType> >& V, bool sorted)
{
#ifdef ASSERT_SPARSEMATRIX
assert(nr==1);
#endif
if(!head) { compressed=true; return; }
if(!sorted)
//std::sort(V.begin(),V.end(),my_sort);
std::sort(V.begin(),V.end(), [](const std::tuple<IndexType,RealType> >& lhs, const std::tuple<IndexType,RealType> >& rhs){return (bool)(std::get<0>(lhs) < std::get<0>(rhs)} );
myrows->clear();
rowIndex->clear();
vals->clear();
colms->clear();
int nnz=V.size();
myrows->resize(nnz);
vals->resize(nnz);
colms->resize(nnz);
rowIndex->resize(nr+1);
rowIndex[0]=0;
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = complex<double>(std::get<1>(V[i]),0.0);
} else {
vals[i] = static_cast<T>(std::get<1>(V[i]));
}
myrows[i] = 0;
colms[i] = std::get<0>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nc);
#endif
}
rowIndex[1]=V.size();
compressed=true;
}
inline void initFroms1D(std::vector<s1D<complex<RealType> > >& V, bool sorted)
{
#ifdef ASSERT_SPARSEMATRIX
assert(nr==1);
#endif
if(!head) { compressed=true; return; }
if(!sorted)
//std::sort(V.begin(),V.end(),my_sort);
std::sort(V.begin(),V.end(), [](const std::tuple<IndexType,RealType> >& lhs, const std::tuple<IndexType,RealType> >& rhs){return (bool)(std::get<0>(lhs) < std::get<0>(rhs)} );
myrows->clear();
rowIndex->clear();
vals->clear();
colms->clear();
int nnz=V.size();
myrows->resize(nnz);
vals->resize(nnz);
colms->resize(nnz);
rowIndex->resize(nr+1);
rowIndex[0]=0;
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = std::get<1>(V[i]);
} else {
assert(false);
}
myrows[i] = 0;
colms[i] = std::get<0>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nc);
#endif
}
rowIndex[1]=V.size();
compressed=true;
}
inline void initFroms2D(std::vector<s2D<complex<RealType> > >& V, bool sorted)
{
if(!head) { compressed=true; return; }
if(!sorted)
//std::sort(V.begin(),V.end(),my_sort);
std::sort(V.begin(),V.end(), [](const std::tuple<IndexType,RealType> >& lhs, const std::tuple<IndexType,RealType> >& rhs){return (bool)(std::get<0>(lhs) < std::get<0>(rhs)} );
myrows->clear();
rowIndex->clear();
vals->clear();
colms->clear();
int nnz=V.size();
myrows->resize(nnz);
vals->resize(nnz);
colms->resize(nnz);
rowIndex->resize(nr+1);
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = std::get<2>(V[i]);
} else {
assert(false);
}
myrows[i] = std::get<0>(V[i]);
colms[i] = std::get<1>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nr);
assert(std::get<1>(V[i]) >= 0 && std::get<1>(V[i]) < nc);
#endif
}
int curr=-1;
for(int n=0; n<myrows->size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows->back()+1; i<rowIndex->size(); i++)
rowIndex[i] = vals->size();
compressed=true;
}
inline void initFroms2D(std::vector<s2D<RealType> >& V, bool sorted)
{
if(!head) { compressed=true; return; }
if(!sorted)
//std::sort(V.begin(),V.end(),my_sort);
std::sort(V.begin(),V.end(), [](const std::tuple<IndexType,RealType> >& lhs, const std::tuple<IndexType,RealType> >& rhs){return (bool)(std::get<0>(lhs) < std::get<0>(rhs)} );
myrows->clear();
rowIndex->clear();
vals->clear();
colms->clear();
int nnz=V.size();
myrows->resize(nnz);
vals->resize(nnz);
colms->resize(nnz);
rowIndex->resize(nr+1);
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = complex<double>(std::get<2>(V[i]),0.0);
} else {
vals[i] = static_cast<T>(std::get<2>(V[i]));
}
myrows[i] = std::get<0>(V[i]);
colms[i] = std::get<1>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nr);
assert(std::get<1>(V[i]) >= 0 && std::get<1>(V[i]) < nc);
#endif
}
int curr=-1;
for(int n=0; n<myrows->size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows->back()+1; i<rowIndex->size(); i++)
rowIndex[i] = vals->size();
compressed=true;
}
inline void check()
{
if(!head) return;
for(int i=0; i<rowIndex->size()-1; i++)
{
if(rowIndex[i+1] < rowIndex[i]) std::cout<<"Error: SMSparseMatrix::check(): rowIndex-> \n" <<std::endl;
}
}
inline SMSparseMatrix<T>& operator*=(const RealType rhs )
{
if(!head) return;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
inline SMSparseMatrix<T>& operator*=(const complex<RealType> rhs )
{
if(!head) return;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
inline void toZeroBase() {
if(!head) return;
if(zero_based) return;
zero_based=true;
for (int& i : colms ) i--;
for (int& i : myrows ) i--;
for (int& i : rowIndex ) i--;
}
inline void toOneBase() {
if(!head) return;
if(!zero_based) return;
zero_based=false;
for (int& i : colms ) i++;
for (int& i : myrows ) i++;
for (int& i : rowIndex ) i++;
}
friend std::ostream& operator<<(std::ostream& out, const SMSparseMatrix<T>& rhs)
{
for(int i=0; i<rhs.vals->size(); i++)
out<<"(" <<rhs.myrows[i] <<"," <<rhs.colms[i] <<":" <<rhs.vals[i] <<")\n";
return out;
}
friend std::istream& operator>>(std::istream& in, SMSparseMatrix<T>& rhs)
{
if(!head) return;
T v;
int c,r;
in>>r >>c >>v;
rhs.vals->push_back(v);
rhs.myrows->push_back(r);
rhs.colms->push_back(c);
return in;
}
// this is ugly, but I need to code quickly
// so I'm doing this to avoid adding hdf5 support here
inline SMVector<T>* getVals() { return vals; }
inline SMVector<int>* getRows() { return myrows; }
inline SMVector<int>* getCols() { return colms; }
inline SMVector<int>* getRowIndex() { return rowIndex; }
void setRowsFromRowIndex()
{
if(!head) return;
int shift = zero_based?0:1;
myrows->resize(vals->size());
for(int i=0; i<nr; i++)
for(int j=rowIndex[i]; j<rowIndex[i+1]; j++)
myrows[j]=i+shift;
}
private:
bool compressed;
int nr,nc;
SMVector<T> *vals;
SMVector<int> *colms,*myrows,*rowIndex;
bool zero_based;
bool head;
std::string ID;
//_mySort_snD_ my_sort;
boost::interprocess::managed_shared_memory *segment;
ShmemAllocator<T> *alloc_T;
ShmemAllocator<int> *alloc_int;
struct shm_remove
{
bool head;
shm_remove() {
boost::interprocess::shared_memory_object::remove(ID.c_str());
}
~shm_remove(){
boost::interprocess::shared_memory_object::remove(ID.c_str());
}
} remover;
};
}
#endif
#endif

707
src/AFQMC/Matrix/SparseMatrix.h Executable file
View File

@ -0,0 +1,707 @@
#ifndef QMCPLUSPLUS_AFQMC_SPARSEMATRIX_H
#define QMCPLUSPLUS_AFQMC_SPARSEMATRIX_H
#include<iostream>
#include<vector>
#include<tuple>
#include<assert.h>
#include<algorithm>
#include"AFQMC/config.0.h"
#define ASSERT_SPARSEMATRIX
#if defined(USE_EIGEN)
namespace qmcplusplus
{
}
#else // In this case, use OhhmsPETE and your sparse matrix class
namespace qmcplusplus
{
// class that implements a sparse matrix in CSR format
template<class T>
class SparseMatrix
{
public:
typedef T Type_t;
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef const int* const_indxPtr;
typedef int* indxPtr;
typedef typename std::vector<T>::iterator iterator;
typedef typename std::vector<T>::const_iterator const_iterator;
typedef SparseMatrix<T> This_t;
SparseMatrix<T>():vals(),colms(),myrows(),rowIndex(),nr(0),nc(0),compressed(false),zero_based(true)
{
}
SparseMatrix<T>(int n):vals(),colms(),myrows(),rowIndex(),nr(n),nc(n),compressed(false),zero_based(true)
{
}
SparseMatrix<T>(int n,int m):vals(),colms(),myrows(),rowIndex(),nr(n),nc(m),compressed(false),zero_based(true)
{
}
~SparseMatrix<T>()
{
}
SparseMatrix(const SparseMatrix<T> &rhs)
{
compressed=rhs.compressed;
zero_based=true;
nr=rhs.nr;
nc=rhs.nc;
vals=rhs.vals;
myrows=rhs.myrows;
colms=rhs.colms;
rowIndex=rhs.rowIndex;
}
inline void reserve(int n)
{
vals.reserve(n);
myrows.reserve(n);
colms.reserve(n);
rowIndex.reserve(nr+1);
}
inline bool allocateMemoryAndReserve(int n)
{
reserve(n);
return true;
}
inline bool initializeChildren()
{
return true;
}
inline void resize_arrays(int nnz)
{
vals.resize(nnz);
myrows.resize(nnz);
colms.resize(nnz);
rowIndex.resize(nr+1);
}
inline void clear() {
vals.clear();
colms.clear();
myrows.clear();
rowIndex.clear();
compressed=false;
zero_based=true;
}
inline void setDims(int n, int m)
{
nr=n;
nc=m;
compressed=false;
zero_based=true;
clear();
}
inline void setCompressed()
{
compressed=true;
}
inline bool isCompressed() const
{
return compressed;
}
inline int size() const
{
return vals.size();
}
inline int rows() const
{
return nr;
}
inline int cols() const
{
return nc;
}
inline const_pointer values() const
{
return vals.data();
}
inline pointer values()
{
return vals.data();
}
inline const_indxPtr column_data() const
{
return colms.data();
}
inline indxPtr column_data()
{
return colms.data();
}
inline const_indxPtr row_data() const
{
return myrows.data();
}
inline indxPtr row_data()
{
return myrows.data();
}
inline const_indxPtr row_index() const
{
return rowIndex.data();
}
inline indxPtr row_index()
{
return rowIndex.data();
}
inline This_t& operator=(const SparseMatrix<T> &rhs)
{
compressed=rhs.compressed;
zero_based=rhs.zero_based;
nr=rhs.nr;
nc=rhs.nc;
vals=rhs.vals;
myrows=rhs.myrows;
colms=rhs.colms;
rowIndex=rhs.rowIndex;
}
inline int find_element(int i, int j) {
return 0;
}
inline Type_t& operator()(int i, int j)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<nr && j>=0 && j<nc && compressed);
#endif
return vals[find_element(i,j)];
}
inline Type_t operator()( int i, int j) const
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<nr && j>=0 && j<nc && compressed);
#endif
return vals[find_element(i,j)];
}
inline void add(const int i, const int j, const T& v, bool dummy=false)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<nr && j>=0 && j<nc);
#endif
compressed=false;
myrows.push_back(i);
colms.push_back(j);
vals.push_back(v);
}
inline bool remove_repeated()
{
#ifdef ASSERT_SPARSEMATRIX
assert(myrows.size() == colms.size() && myrows.size() == vals.size());
#endif
compressed=false;
for(std::vector<int>::iterator itri=myrows.begin(); itri<myrows.end(); itri++)
{
int ki = std::distance( myrows.begin(), itri );
for(std::vector<int>::iterator itrj=itri+1; itrj<myrows.end(); itrj++)
{
int kj = std::distance( myrows.begin(), itrj );
if( *itri == *itrj && colms[ki] == colms[kj] ) {
if(vals[ki] != vals[kj]) {
app_error()<<" Error in call to SparseMatrix::remove_repeated. Same indexes with different values. \n";
app_error()<<"i: ri, ci, vi: "
<<ki <<" "
<<*itri <<" "
<<colms[ki] <<" "
<<vals[ki] <<"\n"
<<"j: rj, cj, vj: "
<<kj <<" "
<<*itrj <<" "
<<colms[kj] <<" "
<<vals[kj] <<std::endl;
return false;
}
itrj = myrows.erase(itrj);
colms.erase( colms.begin()+kj );
vals.erase( vals.begin()+kj );
}
}
}
return true;
}
inline void compress_old()
{
#ifdef ASSERT_SPARSEMATRIX
assert(myrows.size() == colms.size() && myrows.size() == vals.size());
#endif
// This is not efficient. Write your own iterator to swap all arrays simultaneously during sort
// Simple options for now:
// 1. use memory and efficient std::sort
// 2. no memory but my inefficient algorithm???
// Using #1 for now!!!
// order along myrows
int n=myrows.size();
std::vector<std::tuple<int,int> > toSort;
toSort.reserve(n);
for(int i=0; i<n; i++) toSort.push_back(std::forward_as_tuple(myrows[i],i));
std::sort(toSort.begin(),toSort.end());
std::vector<T> tmp;
tmp=vals;
myrows=colms;
for(int i=0; i<n; i++) {
int k=std::get<1>(toSort[i]);
colms[i] = myrows[k];
vals[i] = tmp[k];
}
for(int i=0; i<n; i++)
myrows[i] = std::get<0>(toSort[i]);
if(!std::is_sorted(myrows.begin(),myrows.end()))
std::cout<<"ERROR: list is not sorted. \n" <<std::endl;
// define rowIndex
rowIndex.resize(nr+1);
int curr=-1;
for(int n=0; n<myrows.size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows.back()+1; i<rowIndex.size(); i++)
rowIndex[i] = vals.size();
// order within each rowIndex block
for(int k=0; k<nr; k++) {
if(rowIndex[k] == rowIndex[k+1]) continue;
toSort.clear();
tmp.clear();
for(int i=rowIndex[k],p=0; i<rowIndex[k+1]; i++,p++) toSort.push_back(std::forward_as_tuple(colms[i],p));
for(int i=rowIndex[k]; i<rowIndex[k+1]; i++) tmp.push_back(vals[i]);
std::sort(toSort.begin(),toSort.end());
for(int i=rowIndex[k],p=0; i<rowIndex[k+1]; i++,p++) {
colms[i] = std::get<0>(toSort[p]);
vals[i] = tmp[std::get<1>(toSort[p])];
}
}
compressed=true;
}
inline void compress()
{
#ifdef ASSERT_SPARSEMATRIX
assert(myrows.size() == colms.size() && myrows.size() == vals.size());
#endif
// order along myrows
int n=myrows.size();
sort_rows(0,n-1);
if(!std::is_sorted(myrows.begin(),myrows.end()))
std::cout<<"ERROR: list is not sorted. \n" <<std::endl;
// define rowIndex
rowIndex.resize(nr+1);
int curr=-1;
for(int n=0; n<myrows.size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows.back()+1; i<rowIndex.size(); i++)
rowIndex[i] = vals.size();
// order within each rowIndex block
for(int k=0; k<nr; k++) {
if(rowIndex[k] == rowIndex[k+1]) continue;
sort_colms(rowIndex[k],rowIndex[k+1]-1);
}
compressed=true;
}
void sort_rows(int left, int right) {
int i = left, j = right;
auto pivot = myrows[(left + right) / 2];
/* partition */
while (i <= j) {
while (myrows[i] < pivot)
i++;
while (myrows[j] > pivot)
j--;
if (i <= j) {
std::swap(myrows[i],myrows[j]);
std::swap(colms[i],colms[j]);
std::swap(vals[i++],vals[j--]);
}
};
/* recursion */
if (left < j)
sort_rows(left, j);
if (i < right)
sort_rows(i, right);
}
void sort_colms(int left, int right) {
int i = left, j = right;
auto pivot = colms[(left + right) / 2];
/* partition */
while (i <= j) {
while (colms[i] < pivot)
i++;
while (colms[j] > pivot)
j--;
if (i <= j) {
std::swap(colms[i],colms[j]);
std::swap(vals[i++],vals[j--]);
}
};
/* recursion */
if (left < j)
sort_colms(left, j);
if (i < right)
sort_colms(i, right);
}
inline void transpose() {
assert(myrows.size() == colms.size() && myrows.size() == vals.size());
for(std::vector<int>::iterator itR=myrows.begin(),itC=colms.begin(); itR!=myrows.end(); ++itR,++itC)
std::swap(*itR,*itC);
std::swap(nr,nc);
compress();
}
inline void initFroms1D(std::vector<std::tuple<IndexType,RealType> >& V, bool sorted)
{
#ifdef ASSERT_SPARSEMATRIX
assert(nr==1);
#endif
if(!sorted)
std::sort(V.begin(),V.end(),my_sort);
myrows.clear();
rowIndex.clear();
vals.clear();
colms.clear();
int nnz=V.size();
myrows.resize(nnz);
vals.resize(nnz);
colms.resize(nnz);
rowIndex.resize(nr+1);
rowIndex[0]=0;
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = std::complex<double>(std::get<1>(V[i]),0.0);
} else {
vals[i] = static_cast<T>(std::get<1>(V[i]));
}
myrows[i] = 0;
colms[i] = std::get<0>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nc);
#endif
}
rowIndex[1]=V.size();
compressed=true;
}
inline void initFroms1D(std::vector<s1D<std::complex<RealType> > >& V, bool sorted)
{
#ifdef ASSERT_SPARSEMATRIX
assert(nr==1);
#endif
if(!sorted)
std::sort(V.begin(),V.end(),my_sort);
myrows.clear();
rowIndex.clear();
vals.clear();
colms.clear();
int nnz=V.size();
myrows.resize(nnz);
vals.resize(nnz);
colms.resize(nnz);
rowIndex.resize(nr+1);
rowIndex[0]=0;
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = std::get<1>(V[i]);
} else {
assert(false);
}
myrows[i] = 0;
colms[i] = std::get<0>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nc);
#endif
}
rowIndex[1]=V.size();
compressed=true;
}
inline void initFroms2D(std::vector<s2D<std::complex<RealType> > >& V, bool sorted)
{
if(!sorted)
std::sort(V.begin(),V.end(),my_sort);
myrows.clear();
rowIndex.clear();
vals.clear();
colms.clear();
int nnz=V.size();
myrows.resize(nnz);
vals.resize(nnz);
colms.resize(nnz);
rowIndex.resize(nr+1);
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = std::get<2>(V[i]);
} else {
assert(false);
}
myrows[i] = std::get<0>(V[i]);
colms[i] = std::get<1>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nr);
assert(std::get<1>(V[i]) >= 0 && std::get<1>(V[i]) < nc);
#endif
}
int curr=-1;
for(int n=0; n<myrows.size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows.back()+1; i<rowIndex.size(); i++)
rowIndex[i] = vals.size();
compressed=true;
}
inline void initFroms2D(std::vector<s2D<RealType> >& V, bool sorted)
{
if(!sorted)
std::sort(V.begin(),V.end(),my_sort);
myrows.clear();
rowIndex.clear();
vals.clear();
colms.clear();
int nnz=V.size();
myrows.resize(nnz);
vals.resize(nnz);
colms.resize(nnz);
rowIndex.resize(nr+1);
for(int i=0; i<V.size(); i++) {
if( std::is_same<T,std::complex<double> >::value ) {
vals[i] = std::complex<double>(std::get<2>(V[i]),0.0);
} else {
vals[i] = static_cast<T>(std::get<2>(V[i]));
}
myrows[i] = std::get<0>(V[i]);
colms[i] = std::get<1>(V[i]);
#ifdef ASSERT_SPARSEMATRIX
assert(std::get<0>(V[i]) >= 0 && std::get<0>(V[i]) < nr);
assert(std::get<1>(V[i]) >= 0 && std::get<1>(V[i]) < nc);
#endif
}
int curr=-1;
for(int n=0; n<myrows.size(); n++) {
if( myrows[n] != curr ) {
int old = curr;
curr = myrows[n];
for(int i=old+1; i<=curr; i++) rowIndex[i] = n;
}
}
for(int i=myrows.back()+1; i<rowIndex.size(); i++)
rowIndex[i] = vals.size();
compressed=true;
}
inline void check()
{
for(int i=0; i<rowIndex.size()-1; i++)
{
if(rowIndex[i+1] < rowIndex[i]) std::cout<<"Error: SparseMatrix::check(): rowIndex. \n" <<std::endl;
}
}
inline SparseMatrix<T>& operator*=(const RealType rhs )
{
for(iterator it=vals.begin(); it!=vals.end(); it++)
(*it) *= rhs;
return *this;
}
inline SparseMatrix<T>& operator*=(const std::complex<RealType> rhs )
{
for(iterator it=vals.begin(); it!=vals.end(); it++)
(*it) *= rhs;
return *this;
}
inline void toZeroBase() {
if(zero_based) return;
zero_based=true;
for (int& i : colms ) i--;
for (int& i : myrows ) i--;
for (int& i : rowIndex ) i--;
}
inline void toOneBase() {
if(!zero_based) return;
zero_based=false;
for (int& i : colms ) i++;
for (int& i : myrows ) i++;
for (int& i : rowIndex ) i++;
}
/*
inline SparseMatrix<T>& operator+=(const SparseMatrix<T>& rhs )
{
this->DM += rhs.DM;
return *this;
}
inline SparseMatrix<T>& operator-=(const SparseMatrix<T>& rhs )
{
this->DM -= rhs.DM;
return *this;
}
inline SparseMatrix<T> operator+(const SparseMatrix<T>& opB) const {
return SparseMatrix<T>(*this) += opB;
}
inline SparseMatrix<T> operator-(const SparseMatrix<T>& opB) const {
return SparseMatrix<T>(*this) -= opB;
}
inline SparseMatrix<T> operator*(const SparseMatrix<T>& B) const {
//#ifdef()
// assert(this->colms() == B.myrows());
//#endif
SparseMatrix<T> C(this->myrows(),B.colms());
const char transa = 'N';
const char transb = 'N';
const double one=1.0;
const double zero=0.0;
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, B.colms(), this->myrows(), this->colms(),
one, B.data(), B.colms(), this->data(), this->colms(),
zero, C.data(), this->colms());
return C;
}
*/
friend std::ostream& operator<<(std::ostream& out, const SparseMatrix<T>& rhs)
{
for(int i=0; i<rhs.vals.size(); i++)
out<<"(" <<rhs.myrows[i] <<"," <<rhs.colms[i] <<":" <<rhs.vals[i] <<")\n";
return out;
}
friend std::istream& operator>>(std::istream& in, SparseMatrix<T>& rhs)
{
T v;
int c,r;
in>>r >>c >>v;
rhs.vals.push_back(v);
rhs.myrows.push_back(r);
rhs.colms.push_back(c);
return in;
}
// this is ugly, but I need to code quickly
// so I'm doing this to avoid adding hdf5 support here
inline std::vector<T>* getVals() { return &vals; }
inline std::vector<int>* getRows() { return &myrows; }
inline std::vector<int>* getCols() { return &colms; }
inline std::vector<int>* getRowIndex() { return &rowIndex; }
void setRowsFromRowIndex()
{
int shift = zero_based?0:1;
myrows.resize(vals.size());
for(int i=0; i<nr; i++)
for(int j=rowIndex[i]; j<rowIndex[i+1]; j++)
myrows[j]=i+shift;
}
bool zero_base() const { return zero_based; }
int row_max() const { return max_in_row; }
int format() const { return storage_format; }
private:
bool compressed;
int nr,nc;
std::vector<T> vals;
std::vector<int> colms,myrows,rowIndex;
bool zero_based;
int storage_format; // 0: CSR, 1: Compressed Matrix (ESSL)
int max_in_row;
_mySort_snD_ my_sort;
/*
struct __mySort_snD__ {
bool operator() (const s1D<RealType>& lhs, const s1D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs));
}
bool operator() (const s2D<RealType>& lhs, const s2D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs)) ||
( !(bool)(std::get<0>(rhs) < std::get<0>(lhs)) &&
(bool)(std::get<1>(lhs) < std::get<1>(rhs)) );
}
bool operator() (const s1D<std::complex<RealType> >& lhs, const s1D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs));
}
bool operator() (const s2D<std::complex<RealType> >& lhs, const s2D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs)) ||
( !(bool)(std::get<0>(rhs) < std::get<0>(lhs)) &&
(bool)(std::get<1>(lhs) < std::get<1>(rhs)) );
}
} my_sort;
*/
};
}
#endif
#endif

440
src/AFQMC/Matrix/dv.h Executable file
View File

@ -0,0 +1,440 @@
#ifndef QMCPLUSPLUS_AFQMC_SMDENSEVECTOR_H
#define QMCPLUSPLUS_AFQMC_SMDENSEVECTOR_H
#include<iostream>
#include<vector>
#include<tuple>
#include <cassert>
#include<algorithm>
#include<complex>
#include"../config.0.h"
//#include"AFQMC/config.0.h"
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/vector.hpp>
#define ASSERT_VECTOR
namespace qmcplusplus
{
// wrapper for boost::interprocess::vector
template<class T>
class SMDenseVector
{
public:
template<typename spT> using ShmemAllocator = boost::interprocess::allocator<spT, boost::interprocess::managed_shared_memory::segment_manager>;
template<typename spT> using boost_SMVector = boost::interprocess::vector<spT, ShmemAllocator<spT>>;
typedef T Type_t;
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef const int* const_indxPtr;
typedef int* indxPtr;
typedef typename boost_SMVector<T>::iterator iterator;
typedef typename boost_SMVector<T>::const_iterator const_iterator;
typedef typename boost_SMVector<int>::iterator int_iterator;
typedef typename boost_SMVector<int>::const_iterator const_int_iterator;
typedef boost_SMVector<T> This_t;
SMDenseVector<T>():head(false),ID(""),SMallocated(false),vals(NULL),share_buff(NULL),mutex(NULL),npig(0),
segment(NULL),alloc_T(NULL),alloc_mutex(NULL),alloc_uchar(NULL)
{
remover.ID="NULL";
remover.head=false;
}
~SMDenseVector<T>()
{
if(segment!=NULL) {
delete segment;
boost::interprocess::shared_memory_object::remove(ID.c_str());
}
}
// this should probably be disallowed
SMDenseVector(const SMDenseVector<T> &rhs)
{
// ID = rhs.ID; // is this a good idea???
// head = rhs.head;
APP_ABORT(" Error: SMDenseVector(SMDenseVector rhs) copy constructor has been disabled.");
}
inline void setup(bool hd, std::string ii, int _npig, MPI_Comm comm_) {
npig = _npig;
head=hd;
ID=ii;
remover.ID=ii;
remover.head=hd;
comm=comm_;
}
inline void reserve(int nnz, bool allow_reduce = false)
{
if(vals==NULL || (vals!=NULL && vals->capacity() < nnz) || (vals!=NULL && vals->capacity() > nnz && allow_reduce)) {
allocate(nnz,allow_reduce);
barrier();
initializeChildren();
barrier();
}
barrier();
}
template<typename T1>
inline void share(T1* x, int n, bool sender) {
if(!SMallocated)
APP_ABORT("Error: Call to SMDenseVector::share with unallocated object. \n");
assert( sizeof(T1)*n < sizeof(unsigned char)*share_buff->size() );
if(sender) {
std::memcpy(&((*share_buff)[0]),x,sizeof(T1)*n);
barrier();
} else {
barrier();
std::memcpy(x,&((*share_buff)[0]),sizeof(T1)*n);
}
barrier();
}
template<typename T1>
inline void share(std::vector<T1>& x, int n, bool sender) {
if(!SMallocated)
APP_ABORT("Error: Call to SMDenseVector::share with unallocated object. \n");
assert( sizeof(T1)*n < sizeof(unsigned char)*share_buff->size() );
assert( x.size() >= n);
if(sender) {
std::memcpy(&((*share_buff)[0]),x.data(),sizeof(T1)*n);
barrier();
} else {
barrier();
std::memcpy(x.data(),&((*share_buff)[0]),sizeof(T1)*n);
}
barrier();
}
inline void barrier() {
if(npig==1) return;
// bool done = mybarrier->wait();
MPI_Barrier(comm);
}
inline bool deallocate()
{
SMallocated = false;
barrier();
if(!head) {
try{
delete segment;
segment=NULL;
} catch(std::bad_alloc&) {
std::cerr<<"Problems deleting segment in SMDenseVector::deallocate()." <<std::endl;
return false;
}
}
barrier();
if(head) {
try{
delete segment;
segment=NULL;
boost::interprocess::shared_memory_object::remove(ID.c_str());
} catch(std::bad_alloc&) {
std::cerr<<"Problems de-allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
}
barrier();
}
inline bool allocate(int n, bool allow_reduce=false)
{
bool grow = false;
uint64_t old_sz = (segment==NULL)?0:(segment->get_size());
if(SMallocated) {
if(vals!=NULL && vals->capacity() >= n && !allow_reduce) return true;
grow = true;
if(!head) { // delay delete call on head in case you need to shrink vector
delete segment;
segment=NULL;
}
}
barrier();
if(head) {
uint64_t memory = sizeof(boost::interprocess::interprocess_mutex)+n*sizeof(T)+1000*sizeof(unsigned char)+8000;
if(grow) {
if(memory > old_sz) {
uint64_t extra = memory - old_sz;
delete segment;
segment=NULL;
if(!boost::interprocess::managed_shared_memory::grow(ID.c_str(), extra)) {
std::cerr<<" Error growing shared memory in SMDenseVector::allocate(). \n";
return false;
}
} else {
segment->destroy<boost_SMVector<T>>("vals");
vals = segment->construct<boost_SMVector<T>>("vals")(*alloc_T);
vals->reserve(n);
delete segment;
segment=NULL;
if(!boost::interprocess::managed_shared_memory::shrink_to_fit(ID.c_str())) {
std::cerr<<" Error in shrink_to_fit shared memory in SMDenseVector::allocate(). \n";
return false;
}
}
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::open_only, ID.c_str());
vals = segment->find<boost_SMVector<T>>("vals").first;
share_buff = segment->find<boost_SMVector<unsigned char>>("share_buff").first;
mutex = segment->find<boost::interprocess::interprocess_mutex>("mutex").first;
assert(vals != 0);
assert(share_buff != 0);
assert(mutex != 0);
vals->reserve(n);
} catch(std::bad_alloc&) {
std::cerr<<"Problems opening shared memory in SMDenseVector::allocate() ." <<std::endl;
return false;
}
} else {
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cout<<" Found managed_shared_memory segment, removing. Careful with persistent SHM segment. \n";
boost::interprocess::shared_memory_object::remove(ID.c_str());
segment=NULL;
}
if(segment==NULL) {
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cerr<<"Problems setting up managed_shared_memory in SMSparseMatrix." <<std::endl;
return false;
}
}
try {
alloc_T = new ShmemAllocator<T>(segment->get_segment_manager());
alloc_uchar = new ShmemAllocator<unsigned char>(segment->get_segment_manager());
share_buff = segment->construct<boost_SMVector<unsigned char>>("share_buff")(*alloc_uchar);
share_buff->resize(1000);
mutex = segment->construct<boost::interprocess::interprocess_mutex>("mutex")();
vals = segment->construct<boost_SMVector<T>>("vals")(*alloc_T);
vals->reserve(n);
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
}
}
barrier();
SMallocated=true;
return true;
}
// only call this when all arrays have been allocated and modified
inline bool initializeChildren()
{
if(head) return true;
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::open_only, ID.c_str());
vals = segment->find<boost_SMVector<T>>("vals").first;
share_buff = segment->find<boost_SMVector<unsigned char>>("share_buff").first;
mutex = segment->find<boost::interprocess::interprocess_mutex>("mutex").first;
assert(vals != 0);
assert(share_buff != 0);
assert(mutex != 0);
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector: initializeChildren() ." <<std::endl;
return false;
}
return true;
}
// resize is probably the best way to setup the vector
inline void resize(int nnz, bool allow_reduce=false)
{
if(vals==NULL || (vals!=NULL && vals->capacity() < nnz) ) {
allocate(nnz,allow_reduce);
barrier();
initializeChildren();
barrier();
} else if(vals!=NULL && vals->capacity() > nnz && allow_reduce) {
std::vector<T> tmp;
if(head) {
tmp.resize(nnz);
std::copy(vals->begin(),vals->begin()+nnz,tmp.begin());
}
allocate(nnz,allow_reduce);
if(head) {
vals->resize(nnz);
std::copy(tmp.begin(),tmp.begin()+nnz,vals->begin());
}
barrier();
initializeChildren();
barrier();
}
if(head) vals->resize(nnz);
barrier();
}
inline void clear() {
if(!head) return;
if(!SMallocated) return;
vals->clear();
}
inline int size() const
{
return (vals!=NULL)?(vals->size()):0;
}
inline const_pointer values() const
{
return (vals!=NULL)?(&((*vals)[0])):NULL;
}
inline pointer values()
{
return (vals!=NULL)?(&((*vals)[0])):NULL;
}
inline This_t& operator=(const SMDenseVector<T> &rhs)
{
APP_ABORT(" Error: SMDenseVector(SMDenseVector rhs) operator= has been disabled.");
//resize(rhs.size());
//if(!head) return *this;
//(*vals)=*(rhs.vals);
//return *this;
}
inline Type_t& operator()(unsigned int i)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
return (*vals)[i];
}
inline Type_t& operator[](unsigned int i)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
return (*vals)[i];
}
inline void add(const int i, const T& v, bool needs_locks=false)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
if(needs_locks) {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*mutex);
(*vals)[i]=v;
} else {
if(!head) return;
(*vals)[i]=v;
}
}
inline int capacity() { return (vals==NULL)?0:vals->capacity(); }
inline void push_back(const T& v, bool needs_locks=false)
{
if(vals==NULL) return;
if(vals->capacity() <= vals->size()+1) allocate(vals->size()+1000);
if(needs_locks) {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*mutex);
vals->push_back(v);
} else {
if(!head) return;
vals->push_back(v);
}
}
inline SMDenseVector<T>& operator*=(const RealType rhs )
{
if(!head) return *this;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
inline SMDenseVector<T>& operator*=(const std::complex<RealType> rhs )
{
if(!head) return *this;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
friend std::ostream& operator<<(std::ostream& out, const SMDenseVector<T>& rhs)
{
for(int i=0; i<rhs.vals->size(); i++)
out<<"(" <<(*(rhs.myrows))[i] <<"," <<(*(rhs.colms))[i] <<":" <<(*(rhs.vals))[i] <<")\n";
return out;
}
// this is ugly, but I need to code quickly
// so I'm doing this to avoid adding hdf5 support here
inline boost_SMVector<T>* getVector() const { return vals; }
inline iterator begin() { return vals->begin(); }
inline const_iterator begin() const { return vals->begin(); }
inline const_iterator end() const { return vals->end(); }
inline iterator end() { return vals->end(); }
boost::interprocess::interprocess_mutex* getMutex()
{
return mutex;
}
private:
boost::interprocess::interprocess_mutex *mutex;
boost_SMVector<T> *vals;
boost_SMVector<unsigned char> *share_buff;
bool head;
std::string ID;
bool SMallocated;
int npig;
boost::interprocess::managed_shared_memory *segment;
ShmemAllocator<T> *alloc_T;
ShmemAllocator<boost::interprocess::interprocess_mutex> *alloc_mutex;
ShmemAllocator<unsigned char> *alloc_uchar;
// using MPI for barrier calls until I find solution
MPI_Comm comm;
struct shm_remove
{
bool head;
std::string ID;
shm_remove() {
if(head) boost::interprocess::shared_memory_object::remove(ID.c_str());
}
~shm_remove(){
if(head) boost::interprocess::shared_memory_object::remove(ID.c_str());
}
} remover;
};
}
#endif

View File

@ -0,0 +1,370 @@
#ifndef QMCPLUSPLUS_AFQMC_SMDENSEVECTOR_H
#define QMCPLUSPLUS_AFQMC_SMDENSEVECTOR_H
#include<iostream>
#include<vector>
#include<tuple>
#include<assert.h>
#include<algorithm>
#include<complex>
#include"../config.0.h"
//#include"AFQMC/config.0.h"
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/vector.hpp>
//#include <boost/interprocess/sync/interprocess_barrier.hpp>
#define ASSERT_VECTOR
namespace qmcplusplus
{
// wrapper for boost::interprocess::vector
template<class T>
class SMDenseVector
{
public:
template<typename spT> using ShmemAllocator = boost::interprocess::allocator<spT, boost::interprocess::managed_shared_memory::segment_manager>;
template<typename spT> using boost_SMVector = boost::interprocess::vector<spT, ShmemAllocator<spT>>;
typedef T Type_t;
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef const int* const_indxPtr;
typedef int* indxPtr;
typedef typename boost_SMVector<T>::iterator iterator;
typedef typename boost_SMVector<T>::const_iterator const_iterator;
typedef typename boost_SMVector<int>::iterator int_iterator;
typedef typename boost_SMVector<int>::const_iterator const_int_iterator;
typedef boost_SMVector<T> This_t;
SMDenseVector<T>():head(false),ID(""),SMallocated(false),vals(NULL),share_buff(NULL),mutex(NULL),npig(0)
{
remover.ID="NULL";
remover.head=false;
}
~SMDenseVector<T>()
{
if(head && SMallocated) {
segment->destroy<boost_SMVector<T>>("vals");
segment->destroy<boost_SMVector<unsigned char>>("share_buff");
segment->destroy<boost_SMVector<T>>("");
delete segment;
boost::interprocess::shared_memory_object::remove(ID.c_str());
}
}
// this should probably be disallowed
SMDenseVector(const SMDenseVector<T> &rhs)
{
ID = rhs.ID; // is this a good idea???
head = rhs.head;
return;
}
inline void setup(bool hd, std::string ii, int _npig, MPI_Comm comm_) {
npig = _npig;
head=hd;
ID=ii;
remover.ID=ii;
remover.head=hd;
comm=comm_;
}
inline void reserve(int n)
{
if(vals==NULL || (vals!=NULL && vals->capacity() < n)) {
allocate(n);
barrier();
initializeChildren();
barrier();
}
barrier();
}
template<typename T1>
inline void share(T1& x, bool sender) {
if(sender) {
std::memcpy(&((*share_buff)[0]),&x,sizeof(T1));
barrier();
} else {
barrier();
std::memcpy(&x,&((*share_buff)[0]),sizeof(T1));
}
barrier();
}
inline void barrier() {
if(npig==1) return;
// bool done = mybarrier->wait();
MPI_Barrier(comm);
}
inline bool deallocate()
{
SMallocated = false;
if(head) {
try{
cout<<"in deallocate() " <<std::endl;
// segment->destroy<boost_SMVector<T>>("vals");
// segment->destroy<boost_SMVector<unsigned char>>("share_buff");
// segment->destroy<boost_SMVector<boost::interprocess::interprocess_mutex>>("mutex");
// delete segment;
segment=NULL;
cout<<"in deallocate(): calling remove " <<std::endl;
boost::interprocess::shared_memory_object::remove(ID.c_str());
cout<<"in deallocate(): done calling remove " <<std::endl;
} catch(std::bad_alloc&) {
std::cerr<<"Problems de-allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
}
cout<<"done in deallocate() " <<std::endl;
//barrier();
}
// done like this to avoid dependencies on MPI
// Barrier called outside this routine
inline bool allocate(int n)
{
if(!head) {
SMallocated=true;
return true;
}
if(SMallocated) {
if(vals!=NULL && vals->capacity() >= n) return true;
deallocate();
}
//uint64_t memory = sizeof(boost::interprocess::barrier)+sizeof(boost::interprocess::interprocess_mutex)+n*sizeof(T)+1000*sizeof(unsigned char)+8000;
uint64_t memory = sizeof(boost::interprocess::interprocess_mutex)+n*sizeof(T)+1000*sizeof(unsigned char)+8000;
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cout<<" Found managed_shared_memory segment, removing. \n";
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::open_only, ID.c_str());
boost::interprocess::shared_memory_object::remove(ID.c_str());
segment=NULL;
}
if(segment==NULL) {
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::create_only, ID.c_str(), memory);
} catch(boost::interprocess::interprocess_exception &ex) {
std::cerr<<"Problems setting up managed_shared_memory in SMSparseMatrix." <<std::endl;
return false;
}
}
try {
alloc_T = new ShmemAllocator<T>(segment->get_segment_manager());
alloc_uchar = new ShmemAllocator<unsigned char>(segment->get_segment_manager());
share_buff = segment->construct<boost_SMVector<unsigned char>>("share_buff")(*alloc_uchar);
share_buff->resize(1000);
mutex = segment->construct<boost::interprocess::interprocess_mutex>("mutex")();
//mybarrier = segment->construct<boost::interprocess::barrier>("barrier")(npig);
vals = segment->construct<boost_SMVector<T>>("vals")(*alloc_T);
vals->reserve(n);
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector." <<std::endl;
return false;
}
SMallocated=true;
return true;
}
// only call this when all arrays have been allocated and modified
inline bool initializeChildren()
{
if(head) return true;
try {
segment = new boost::interprocess::managed_shared_memory(boost::interprocess::open_only, ID.c_str());
vals = segment->find<boost_SMVector<T>>("vals").first;
share_buff = segment->find<boost_SMVector<unsigned char>>("share_buff").first;
mutex = segment->find<boost::interprocess::interprocess_mutex>("mutex").first;
//mybarrier = segment->find<boost::interprocess::barrier>("barrier").first;
} catch(std::bad_alloc&) {
std::cerr<<"Problems allocating shared memory in SMDenseVector: initializeChildren() ." <<std::endl;
return false;
}
return true;
}
// resize is probably the best way to setup the vector
inline void resize(int nnz, bool wait=true)
{
if(vals==NULL || (vals!=NULL && vals->capacity() < nnz)) {
allocate(nnz);
if(wait) {
barrier();
initializeChildren();
barrier();
}
}
if(head) vals->resize(nnz);
if(wait) barrier();
}
inline void clear() {
if(!head) return;
if(!SMallocated) return;
vals->clear();
}
inline int size() const
{
return (vals!=NULL)?(vals->size()):0;
}
inline const_pointer values() const
{
return (vals!=NULL)?(&((*vals)[0])):NULL;
}
inline pointer values()
{
return (vals!=NULL)?(&((*vals)[0])):NULL;
}
inline This_t& operator=(const SMDenseVector<T> &rhs)
{
resize(rhs.size());
if(!head) return;
(*vals)=*(rhs.vals);
}
inline Type_t& operator()(unsigned int i)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
return (*vals)[i];
}
inline Type_t& operator[](unsigned int i)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
return (*vals)[i];
}
inline void add(const int i, const T& v, bool needs_locks=false)
{
#ifdef ASSERT_SPARSEMATRIX
assert(i>=0 && i<vals->size());
#endif
if(needs_locks) {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*mutex);
(*vals)[i]=v;
} else {
if(!head) return;
(*vals)[i]=v;
}
}
inline int capacity() { return (vals==NULL)?0:vals->capacity(); }
inline void push_back(const T& v, bool needs_locks=false)
{
if(vals==NULL) return;
if(vals->capacity() <= vals->size()+1) allocate(vals->size()+1000);
if(needs_locks) {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> lock(*mutex);
vals->push_back(v);
} else {
if(!head) return;
vals->push_back(v);
}
}
inline SMDenseVector<T>& operator*=(const RealType rhs )
{
if(!head) return *this;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
inline SMDenseVector<T>& operator*=(const std::complex<RealType> rhs )
{
if(!head) return *this;
for(iterator it=vals->begin(); it!=vals->end(); it++)
(*it) *= rhs;
return *this;
}
friend std::ostream& operator<<(std::ostream& out, const SMDenseVector<T>& rhs)
{
for(int i=0; i<rhs.vals->size(); i++)
out<<"(" <<(*(rhs.myrows))[i] <<"," <<(*(rhs.colms))[i] <<":" <<(*(rhs.vals))[i] <<")\n";
return out;
}
// this is ugly, but I need to code quickly
// so I'm doing this to avoid adding hdf5 support here
inline boost_SMVector<T>* getVector() const { return vals; }
inline iterator begin() { return vals->begin(); }
inline const_iterator begin() const { return vals->begin(); }
inline const_iterator end() const { return vals->end(); }
inline iterator end() { return vals->end(); }
boost::interprocess::interprocess_mutex* getMutex()
{
return mutex;
}
private:
boost::interprocess::interprocess_mutex *mutex;
// boost::interprocess::barrier *mybarrier;
boost_SMVector<T> *vals;
boost_SMVector<unsigned char> *share_buff;
bool head;
std::string ID;
bool SMallocated;
int npig;
boost::interprocess::managed_shared_memory *segment;
ShmemAllocator<T> *alloc_T;
ShmemAllocator<boost::interprocess::interprocess_mutex> *alloc_mutex;
ShmemAllocator<unsigned char> *alloc_uchar;
// using MPI for barrier calls until I find solution
MPI_Comm comm;
struct shm_remove
{
bool head;
std::string ID;
shm_remove() {
if(head) boost::interprocess::shared_memory_object::remove(ID.c_str());
}
~shm_remove(){
if(head) boost::interprocess::shared_memory_object::remove(ID.c_str());
}
} remover;
};
}
#endif

BIN
src/AFQMC/Matrix/test Executable file

Binary file not shown.

63
src/AFQMC/Matrix/test.cpp Normal file
View File

@ -0,0 +1,63 @@
#include<iostream>
#include<cstdlib>
#include <mpi.h>
using namespace std;
#include "sys/sysinfo.h"
inline size_t freemem()
{
struct sysinfo si;
sysinfo(&si);
si.freeram+=si.bufferram;
return si.freeram>>20;
}
#include "dv.h"
int main(int argc, char* argv[])
{
int rank, nproc;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nproc);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
qmcplusplus::SMDenseVector<double> buff;
if(rank==0) std::cout<<"Memory: " <<freemem() <<std::endl;
buff.setup(rank==0,std::string("Buffer_0"),nproc,MPI_COMM_WORLD);
buff.resize(1000000);
if(rank==0) std::cout<<"Memory: " <<freemem() <<std::endl;
std::cout<<"rank: " <<rank <<" " <<nproc <<" " <<buff.size() <<std::endl;
for(int i=0; i<5; i++)
*(buff.values()+rank*5+i) = rank;
if(rank==0)
for(int i=0; i<10; i++)
std::cout<<" " <<i <<" " <<*(buff.values()+i) <<std::endl;
buff.resize(100000000);
if(rank==0) std::cout<<"Memory: " <<freemem() <<std::endl;
std::cout<<"rank: " <<rank <<" " <<nproc <<" " <<buff.size() <<std::endl;
for(int i=0; i<10; i++)
*(buff.values()+rank*10+i) = rank+10;
if(rank==0)
for(int i=0; i<20; i++)
std::cout<<" " <<i <<" " <<*(buff.values()+i) <<std::endl;
buff.resize(200000000);
if(rank==0) std::cout<<"Memory: " <<freemem() <<std::endl;
buff.resize(20,true);
if(rank==0) std::cout<<"Memory: " <<freemem() <<std::endl;
MPI_Finalize();
return 0;
}

View File

@ -0,0 +1,504 @@
#include<iterator>
#include"Configuration.h"
//#include <AFQMC/Matrix/DenseMatrix.h>
#include "AFQMC/config.h"
#include "Numerics/OhmmsBlas.h"
#include "Numerics/Blasf.h"
#include "AFQMC/Numerics/DenseMatrixOperations.h"
namespace qmcplusplus
{
namespace DenseMatrixOperators
{
bool symEigenSysAll(int N, double* A, int LDA, double* eigVal, double* eigVec, int LDV)
{
std::vector<double> A0(N*N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) A0[i*N+j] = A[i*LDA+j];
char JOBZ('V');
char RANGE('A');
char UPLO('U');
double VL=0;
double VU=0;
int IL=0;
int IU=0;
double ABSTOL=1e-8;//DLAMCH( 'Safe minimum' );
int M; // output: total number of eigenvalues found
std::vector<int> ISUPPZ(2*N);
std::vector<double> WORK(1); // set with workspace query
int LWORK=-1;
std::vector<int> IWORK(1);
int LIWORK=-1;
int INFO;
const double one=1.0;
const double zero=0.0;
dsyevr(JOBZ, RANGE, UPLO, N, &(A0[0]), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(IWORK[0]), LIWORK, INFO);
LWORK = int(WORK[0]);
WORK.resize(LWORK);
LIWORK = int(IWORK[0]);
IWORK.resize(LIWORK);
// remember that Z comes out transposed!!!
dsyevr (JOBZ, RANGE, UPLO, N, A0.data(), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(IWORK[0]), LIWORK, INFO);
if(INFO != 0) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll; INFO: " <<INFO <<std::endl;
return false;
}
if(M != N) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll. Found too few eigenvalues. M: " <<M <<std::endl;
return false;
}
// transpose Z
transpose<double>(N,eigVec,LDV);
return true;
}
bool symEigenSysAll(int N, std::complex<double>* A, int LDA, double* eigVal, std::complex<double>* eigVec, int LDV)
{
std::vector<std::complex<double> > A0(N*N);
// transposing matrix since lapack expects fortran ordering
// not a problem since routine destroys input matrix and we want to preserve it anyway
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) A0[i*N+j] = A[j*LDA+i];
char JOBZ('V');
char RANGE('A');
char UPLO('U');
double VL=0;
double VU=0;
int IL=0;
int IU=0;
double ABSTOL=1e-8;//DLAMCH( 'Safe minimum' );
int M; // output: total number of eigenvalues found
std::vector<int> ISUPPZ(2*N);
std::vector<std::complex<double> > WORK(1); // set with workspace query
int LWORK=-1;
std::vector<double > RWORK(1); // set with workspace query
int LRWORK=-1;
std::vector<int> IWORK(1);
int LIWORK=-1;
int INFO;
const std::complex<double> one=1.0;
const std::complex<double> zero=0.0;
zheevr (JOBZ, RANGE, UPLO, N, &(A0[0]), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(RWORK[0]), LRWORK, &(IWORK[0]), LIWORK, INFO);
LWORK = int(WORK[0].real());
WORK.resize(LWORK);
LRWORK = int(RWORK[0]);
RWORK.resize(LRWORK);
LIWORK = int(IWORK[0]);
IWORK.resize(LIWORK);
// remember that Z comes out transposed!!!
zheevr (JOBZ, RANGE, UPLO, N, A0.data(), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(RWORK[0]), LRWORK, &(IWORK[0]), LIWORK, INFO);
if(INFO != 0) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll; INFO: " <<INFO <<std::endl;
return false;
}
if(M != N) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll. Found too few eigenvalues. M: " <<M <<std::endl;
return false;
}
// transpose Z
transpose<std::complex<double> >(N,eigVec,LDV);
return true;
}
bool symEigenSysSelect(int N, std::complex<double>* A, int LDA, int neig, double* eigVal, bool getEigV, std::complex<double>* eigVec, int LDV)
{
std::vector<std::complex<double> > A0(N*N);
// transposing matrix since lapack expects fortran ordering
// not a problem since routine destroys input matrix and we want to preserve it anyway
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) A0[i*N+j] = A[j*LDA+i];
char JOBZ('V');
if(!getEigV) JOBZ = 'N';
char RANGE('I');
char UPLO('U');
double VL=0;
double VU=0;
int IL=1;
int IU=neig;
double ABSTOL=1e-8;//DLAMCH( 'Safe minimum' );
int M; // output: total number of eigenvalues found
std::vector<int> ISUPPZ(2*N);
std::vector<std::complex<double> > WORK(1); // set with workspace query
int LWORK=-1;
std::vector<double > RWORK(1); // set with workspace query
int LRWORK=-1;
std::vector<int> IWORK(1);
int LIWORK=-1;
int INFO;
const std::complex<double> one=1.0;
const std::complex<double> zero=0.0;
zheevr (JOBZ, RANGE, UPLO, N, &(A0[0]), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(RWORK[0]), LRWORK, &(IWORK[0]), LIWORK, INFO);
LWORK = int(WORK[0].real());
WORK.resize(LWORK);
LRWORK = int(RWORK[0]);
RWORK.resize(LRWORK);
LIWORK = int(IWORK[0]);
IWORK.resize(LIWORK);
// remember that Z comes out transposed!!!
zheevr (JOBZ, RANGE, UPLO, N, A0.data(), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(RWORK[0]), LRWORK, &(IWORK[0]), LIWORK, INFO);
if(INFO != 0) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll; INFO: " <<INFO <<std::endl;
return false;
}
if(M != neig) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll. Found too few eigenvalues. M: " <<M <<std::endl;
return false;
}
return true;
}
bool symEigenSysSelect(int N, double* A, int LDA, int neig, double* eigVal, bool getEigV, double* eigVec, int LDV)
{
std::vector<double> A0(N*N);
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) A0[i*N+j] = A[i*LDA+j];
char JOBZ('V');
if(!getEigV) JOBZ = 'N';
char RANGE('I');
char UPLO('U');
double VL=0;
double VU=0;
int IL=1;
int IU=neig;
double ABSTOL=1e-8;//DLAMCH( 'Safe minimum' );
int M; // output: total number of eigenvalues found
std::vector<int> ISUPPZ(2*N);
std::vector<double> WORK(1); // set with workspace query
int LWORK=-1;
std::vector<int> IWORK(1);
int LIWORK=-1;
int INFO;
const double one=1.0;
const double zero=0.0;
dsyevr(JOBZ, RANGE, UPLO, N, &(A0[0]), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(IWORK[0]), LIWORK, INFO);
LWORK = int(WORK[0]);
WORK.resize(LWORK);
LIWORK = int(IWORK[0]);
IWORK.resize(LIWORK);
// remember that Z comes out transposed!!!
dsyevr (JOBZ, RANGE, UPLO, N, A0.data(), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(ISUPPZ[0]), &(WORK[0]), LWORK, &(IWORK[0]), LIWORK, INFO);
if(INFO != 0) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll; INFO: " <<INFO <<std::endl;
return false;
}
if(M != neig) {
app_error()<<" Problems with eigenvalue/eigenvector calculation during eigenSysAll. Found too few eigenvalues. M: " <<M <<std::endl;
return false;
}
return true;
}
bool genHermitianEigenSysSelect(int N, std::complex<double>* A, int LDA, std::complex<double>* B, int LDB, int neig, double* eigVal, bool getEigV, std::complex<double>* eigVec, int LDV, int* IFAIL)
{
std::vector<std::complex<double> > A0(N*N);
std::vector<std::complex<double> > B0(N*N);
// transposing matrix since lapack expects fortran ordering
// not a problem since routine destroys input matrix and we want to preserve it anyway
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) A0[i*N+j] = A[j*LDA+i];
for(int i=0; i<N; i++)
for(int j=0; j<N; j++) B0[i*N+j] = B[j*LDB+i];
int ITYPE = 1;
char JOBZ('N');
char RANGE('I');
char UPLO('U');
double VL=0;
double VU=0;
int IL=1;
int IU=neig;
double ABSTOL=1e-8;//DLAMCH( 'Safe minimum' );
int M; // output: total number of eigenvalues found
std::vector<int> ISUPPZ(2*N);
std::vector<std::complex<double> > WORK(1); // set with workspace query
int LWORK=-1;
std::vector<double > RWORK(7*N); // set with workspace query
std::vector<int> IWORK(5*N);
int INFO;
const std::complex<double> one=1.0;
const std::complex<double> zero=0.0;
if(getEigV) JOBZ = 'V';
zhegvx (ITYPE, JOBZ, RANGE, UPLO, N, &(A0[0]), N, &(B0[0]), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(WORK[0]), LWORK, &(RWORK[0]), &(IWORK[0]), IFAIL, INFO);
LWORK = int(WORK[0].real());
WORK.resize(LWORK);
// remember that Z comes out transposed!!!
zhegvx (ITYPE, JOBZ, RANGE, UPLO, N, &(A0[0]), N, &(B0[0]), N, VL, VU, IL, IU, ABSTOL, M, eigVal, eigVec, LDV, &(WORK[0]), LWORK, &(RWORK[0]), &(IWORK[0]), IFAIL, INFO);
if(INFO != 0) {
app_error()<<" Problems with generalized eigenvalue/eigenvector calculation during genHermitianEigenSysSelect; INFO: " <<INFO <<std::endl;
return false;
}
return true;
}
bool exponentiateHermitianMatrix(int N, std::complex<double>* A, int LDA, std::complex<double>* expA, int LDEXPA)
{
std::vector<std::complex<double> > A0(N*N); // temporary storage for later
std::vector<double> W(N); // computed eigenvalues in ascending order
std::vector<std::complex<double> > Z(N*N); // computed eigenvectors
const std::complex<double> one=std::complex<double>(1.0,0.0);
const std::complex<double> zero=std::complex<double>(0.0,0.0);
if(!symEigenSysAll(N,A,LDA,W.data(),Z.data(),N)) {
app_error()<<" Problems in call to eigSysAll in exponentiateHermitianMatrix. \n" <<std::endl;
return false;
}
// always do this test
for(int i=0; i<N; i++)
for(int j=0; j<N; j++)
expA[i*LDEXPA+j] = std::complex<double>(0.0);
for(int i=0; i<N; i++) expA[i*LDEXPA+i] = W[i];
// A0 = V*W
product(N,N,N,Z.data(),N,expA,LDEXPA,A0.data(),N);
// expA = A0*V^* = V*expA*V^*
BLAS::gemm('C','N', N, N, N,
one, &(Z[0]), N, A0.data(), N,
zero, expA, LDEXPA);
// expA should be equal to A
RealType s=0.0;
for(int i=0; i<N; i++)
for(int j=0; j<N; j++)
s += std::abs(A[i*LDA+j]-expA[i*LDEXPA+j]);
if( std::abs(s) > 1e-8) {
std::cerr<<std::endl <<std::endl <<" Error in reconstruction of A: " <<s <<std::endl <<std::endl;
return false;
}
// now exp(A) = Z*exp(M)*Z^*, where A = Z*M*Z^* and M is the diagonal matrix of eigenvalues
for(int i=0; i<N; i++)
for(int j=0; j<N; j++)
expA[i*LDEXPA+j] = std::complex<double>(0.0);
for(int i=0; i<N; i++) expA[i*LDEXPA+i] = std::exp(W[i]);
// A0 = V*expA
product(N,N,N,Z.data(),N,expA,LDEXPA,A0.data(),N);
// expA = A0*V^* = V*expA*V^*
BLAS::gemm('C','N', N, N, N,
one, &(Z[0]), N, A0.data(), N,
zero, expA, LDEXPA);
// expA should be equal to A
return true;
}
bool exponentiateHermitianMatrix(int N, double* A, int LDA, double* expA, int LDEXPA)
{
std::vector<double> A0(N*N); // temporary storage for later
std::vector<double> W(N); // computed eigenvalues in ascending order
std::vector<double> Z(N*N); // computed eigenvectors
const double one=1.0;
const double zero=0.0;
if(!symEigenSysAll(N,A,LDA,W.data(),Z.data(),N)) {
app_error()<<" Problems in call to eigSysAll in exponentiateHermitianMatrix. \n" <<std::endl;
return false;
}
// always do this test
for(int i=0; i<N; i++)
for(int j=0; j<N; j++)
expA[i*LDEXPA+j] = 0.0;
for(int i=0; i<N; i++) expA[i*LDEXPA+i] = W[i];
// A0 = V*W
product(N,N,N,Z.data(),N,expA,LDEXPA,A0.data(),N);
// expA = A0*V' = V*expA*'V
BLAS::gemm('T','N', N, N, N,
one, &(Z[0]), N, A0.data(), N,
zero, expA, LDEXPA);
// expA should be equal to A
RealType s=0.0;
for(int i=0; i<N; i++)
for(int j=0; j<N; j++)
s += std::abs(A[i*LDA+j]-expA[i*LDEXPA+j]);
if( std::abs(s) > 1e-8) {
std::cerr<<std::endl <<std::endl <<" Error in reconstruction of A: " <<s <<std::endl <<std::endl;
return false;
}
// now exp(A) = Z*exp(M)*Z', where A = Z*M*Z' and M is the diagonal matrix of eigenvalues
for(int i=0; i<N; i++)
for(int j=0; j<N; j++)
expA[i*LDEXPA+j] = 0.0;
for(int i=0; i<N; i++) expA[i*LDEXPA+i] = std::exp(W[i]);
// A0 = V*expA
product(N,N,N,Z.data(),N,expA,LDEXPA,A0.data(),N);
// expA = A0*V' = V*expA*'V
BLAS::gemm('T','N', N, N, N,
one, &(Z[0]), N, A0.data(), N,
zero, expA, LDEXPA);
// expA should be equal to A
return true;
}
/*
void product(const int M, const int N, const int K, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, std::complex<double>* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
const std::complex<double> one=1.0;
const std::complex<double> zero=0.0;
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
void product(const int M, const int N, const int K, const std::complex<double> one, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, const std::complex<double> zero, std::complex<double>* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
void product(const int M, const int N, const int K, const double* A, const int LDA, const double* B, const int LDB, double* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
const double one=1.0;
const double zero=0.0;
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
void product(const int M, const int N, const int K, const double one, const double* A, const int LDA, const double* B, const int LDB, const double zero, double* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
*/
void GeneralizedGramSchmidt(std::complex<double>* A, int LDA, int nR, int nC)
{
/*
const std::complex<double> one = std::complex<double>(1.0,0.0);
const std::complex<double> zero = std::complex<double>(0.0,0.0);
for(int j=0; j<nC; j++)
{
// normalize vector
//register std::complex<double> norma = 0.0;
register double norma = 0.0;
std::complex<double>* it = A+j;
for(int i=0; i<nR; ++i,it+=LDA)
//norma += *it*(*it);
norma += std::norm(*it);
it = A+j;
norma = 1.0/std::sqrt(norma);
for(int i=0; i<nR; ++i,it+=LDA)
*it *= norma;
for(int k=j+1; k<nC; k++)
{
register std::complex<double> ovlp = zero;
std::complex<double>* itk = A+k;
it = A+j;
for(int i=0; i<nR; ++i,itk+=LDA,it+=LDA)
//ovlp += *itk*(*it);
ovlp += std::conj(*it)*(*itk);
itk = A+k;
it = A+j;
for(int i=0; i<nR; ++i,itk+=LDA,it+=LDA)
*itk -= ovlp*(*it);
}
}
*/
// void zgeqrf( const int *M, const int *N, std::complex<double> *A, const int *LDA, std::complex<double> *TAU, std::complex<double> *WORK, const int *LWORK, int *INFO );
// void zungqr( const int *M, const int *N, const int *K, std::complex<double> *A, const int *LDA, std::complex<double> *TAU, std::complex<double> *WORK, const int *LWORK, int *INFO );
//
// temporary
std::vector<std::complex<double> > AT(nR*nC);
for(int i=0; i<nR; i++)
for(int j=0; j<nC; j++)
AT[ j*nR+i ] = A[ i*LDA+j ];
int K = std::min(nR,nC);
std::vector<std::complex<double> > TAU(K),WORK(1);
int info,lwork=-1;
zgeqrf( &nR, &nC, AT.data(), &nR, TAU.data(), WORK.data(), &lwork, &info);
lwork = int(WORK[0].real());
WORK.resize(lwork);
zgeqrf( &nR, &nC, AT.data(), &nR, TAU.data(), WORK.data(), &lwork, &info);
if(info != 0) {
app_error()<<" Problems with QR decomposition; INFO: " <<info <<std::endl;
APP_ABORT("Problems with QR decomposition. \n");
}
zungqr( &nR, &nC, &K, AT.data(), &nR, TAU.data(), WORK.data(), &lwork, &info);
if(info != 0) {
app_error()<<" Problems with QR decomposition (zungqr); INFO: " <<info <<std::endl;
APP_ABORT("Problems with QR decomposition (zungqr). \n");
}
for(int i=0; i<nR; i++)
for(int j=0; j<nC; j++)
A[ i*LDA+j ] = AT[ j*nR+i ];
}
} // namespace DenseMatrixOperators
} // namespace qmcplusplus

View File

@ -0,0 +1,199 @@
#ifndef AFQMC_DENSEMATRIXOPERATORS_H
#define AFQMC_DENSEMATRIXOPERATORS_H
#include<iterator>
#include<complex>
//#include <AFQMC/Matrix/DenseMatrix.h>
#include "AFQMC/config.h"
#include "Numerics/OhmmsBlas.h"
#include "Numerics/Blasf.h"
namespace qmcplusplus
{
namespace DenseMatrixOperators
{
inline bool isHermitian(int N, std::complex<double>* A, int LDA)
{
for(int i=0; i<N; i++)
for(int j=i+1; j<N; j++)
if( A[i*LDA+j] != myconj(A[j*LDA+i]) )
return false;
return true;
}
inline bool isHermitian(int N, double* A, int LDA)
{
for(int i=0; i<N; i++)
for(int j=i+1; j<N; j++)
if( A[i*LDA+j] != A[j*LDA+i] )
return false;
return true;
}
inline bool isHermitian(Matrix<std::complex<double> >& A)
{
if(A.rows() != A.cols()) return false;
for(int i=0; i<A.rows(); i++)
for(int j=i+1; j<A.cols(); j++)
if( A(i,j) != myconj(A(j,i)) )
return false;
return true;
}
inline bool isSymmetric(int N, std::complex<double>* A, int LDA)
{
for(int i=0; i<N; i++)
for(int j=i+1; j<N; j++)
if( A[i*LDA+j] != A[j*LDA+i] )
return false;
return true;
}
inline bool isSymmetric(Matrix<std::complex<double> >& A)
{
if(A.rows() != A.cols()) return false;
for(int i=0; i<A.rows(); i++)
for(int j=i+1; j<A.cols(); j++)
if( A(i,j) != A(j,i) )
return false;
return true;
}
template<typename T>
inline void transpose(int N, T* A, int LDA ) {
for (int i=0; i<N; i++)
for (int j=0; j<i; j++)
std::swap(A[i*LDA+j],A[j*LDA+i]);
}
bool exponentiateHermitianMatrix(int N, std::complex<double>* A, int LDA, std::complex<double>* expA, int LDEXPA);
bool symEigenSysAll(int N, std::complex<double>* A, int LDA, double* eigVal, std::complex<double>* eigVec, int LDV);
bool symEigenSysAll(int N, double* A, int LDA, double* eigVal, double* eigVec, int LDV);
bool symEigenSysSelect(int N, double* A, int LDA, int neig, double* eigVal, bool getEigV, double* eigVec, int LDV);
bool symEigenSysSelect(int N, std::complex<double>* A, int LDA, int neig, double* eigVal, bool getEigV, std::complex<double>* eigVec, int LDV);
bool genHermitianEigenSysSelect(int N, std::complex<double>* A, int LDA, std::complex<double>* B, int LDB, int neig, double* eigVal, bool getEigV, std::complex<double>* eigVec, int LDV, int* ifail);
inline void product(const int M, const int N, const int K, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, std::complex<double>* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
const std::complex<double> one=1.0;
const std::complex<double> zero=0.0;
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline void product(const int M, const int N, const int K, const std::complex<double> one, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, const std::complex<double> zero, std::complex<double>* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline void product(const int M, const int N, const int K, const double* A, const int LDA, const double* B, const int LDB, double* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
const double one=1.0;
const double zero=0.0;
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline void product(const int M, const int N, const int K, const double one, const double* A, const int LDA, const double* B, const int LDB, const double zero, double* C, const int LDC )
{
const char transa = 'N';
const char transb = 'N';
// C = A*B -> fortran -> C' = B'*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline void product_AhB(const int M, const int N, const int K, const double one, const double* A, const int LDA, const double* B, const int LDB, const double zero, double* C, const int LDC )
{
const char transa = 'N';
const char transb = 'T';
// C = A'*B -> fortran -> C' = B'*A,
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline void product_AhB(const int M, const int N, const int K, const std::complex<double> one, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, const std::complex<double> zero, std::complex<double>* C, const int LDC )
{
const char transa = 'N';
const char transb = 'C';
// C = A^H*B -> fortran -> C' = B'*conjg(A),
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline void product_ABh(const int M, const int N, const int K, const std::complex<double> one, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, const std::complex<double> zero, std::complex<double>* C, const int LDC )
{
const char transa = 'C';
const char transb = 'N';
// C = A*B^H -> fortran -> C' = conjg(B)*A',
BLAS::gemm(transa,transb, N, M, K,
one, B, LDB, A, LDA,
zero, C, LDC);
}
inline std::complex<double>
Determinant(std::complex<double>* restrict x, int n, int* restrict pivot)
{
std::complex<double> detvalue(1.0);
int status;
zgetrf(n,n,x,n,pivot,status);
for(int i=0,ip=1; i<n; i++, ip++)
{
if(pivot[i]==ip)
detvalue *= x[i*n+i];
else
detvalue *= -x[i*n+i];
}
return detvalue;
}
/*
void product(const int M, const int N, const int K, const std::complex<double>, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, const std::complex<double>, std::complex<double>* C, const int LDC );
void product(const int M, const int N, const int K, const std::complex<double>* A, const int LDA, const std::complex<double>* B, const int LDB, std::complex<double>* C, const int LDC );
void product(const int M, const int N, const int K, const double* A, const int LDA, const double* B, const int LDB, double* C, const int LDC );
void product(const int M, const int N, const int K, const double, const double* A, const int LDA, const double* B, const int LDB, const double, double* C, const int LDC );
*/
void GeneralizedGramSchmidt(std::complex<double>* A, int LDA, int nR, int nC);
} // namespace DenseMatrixOperators
} // namespace qmcplusplus
#endif

View File

@ -0,0 +1,682 @@
#include<iterator>
#include<tuple>
#include<cassert>
#include "AFQMC/config.h"
#include "sys/sysinfo.h"
#if defined(HAVE_MKL)
#include "mkl.h"
#include "mkl_service.h"
#include "mkl_solvers_ee.h"
#elif defined(HAVE_ESSL)
#endif
namespace qmcplusplus
{
namespace SparseMatrixOperators
{
// Performs a product between a sparse matrix stored in format s2D and a dense
// matrix stored in c format
// N: number of rows in B/C
// M: number of columns in B/C
// LDB: leading dimension of B
//
// For eack term in A, aik
// C(i,:) += aik * B(k.:)
void product_SD(const IndexType K,
const s2D<ComplexType>* A, const int nterms,
ComplexType* B, const IndexType LDB,
ComplexType* C, IndexType LDC )
{
ComplexType aik=0;
IndexType ii=0,kk=0;
ComplexType* lit;
ComplexType* rit;
for(int cnt1=0; cnt1<nterms; cnt1++) {
std::tie(ii,kk,aik) = *(A++);
lit=C+ii*LDC;
rit=B+kk*LDB;
for(int cnt2=0; cnt2<K; cnt2++)
*(lit++) += aik*(*(rit++));
}
}
template<class T>
void product_SpMatV(int nrows,
const T& A,
const ComplexType* B,
ComplexType* C )
{
#if defined(HAVE_MKL)
char trans = 'N';
mkl_cspblas_zcsrgemv (&trans, &nrows, A.values() , A.row_index(), A.column_data(), B, C);
//#elif defined(HAVE_ESSL)
#else
ComplexType zero = ComplexType(0,0);
const ComplexType* val = A.values();
const int* cols = A.column_data();
int disp = (A.zero_base())?0:-1;
if( A.format() == 0) { // CSR
const int* rows = A.row_index();
for(int nr=0; nr<nrows; nr++,C++,rows++) {
*C=zero;
for(int i=*rows; i<*(rows+1); i++,val++,cols++)
*C += (*val) * ( *( B + (*cols) + disp) );
}
} else { // EESL: Compressed Matrix
}
#endif
}
template<class T>
void product_SpMatV(const int M, const int K,
const ComplexType& alpha,
const T& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_zcsrmv( &trans, &M, &K, &alpha, matdes, A.values() , A.column_data(), A.row_index() , A.row_index()+1, B, &beta, C );
//#elif defined(HAVE_ESSL)
#else
ComplexType zero = ComplexType(0,0);
const ComplexType* val = A.values();
const int* cols = A.column_data();
int disp = (A.zero_base())?0:-1;
if( A.format() == 0) { // CSR
const int* rows = A.row_index();
for(int nr=0; nr<M; nr++,C++,rows++) {
*C*=beta;
for(int i=*rows; i<*(rows+1); i++,val++,cols++)
*C += alpha * (*val) * ( *( B + (*cols) + disp) );
}
} else { // EESL: Compressed Matrix
}
#endif
}
template<class T>
void product_SpMatV(const int M, const int K,
const RealType& alpha,
const T& A,
const RealType* B,
const RealType& beta,
RealType* C )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_dcsrmv( &trans, &M, &K, &alpha, matdes, A.values() , A.column_data(), A.row_index() , A.row_index()+1, B, &beta, C );
//#elif defined(HAVE_ESSL)
#else
RealType zero = RealType(0);
const RealType* val = A.values();
const int* cols = A.column_data();
int disp = (A.zero_base())?0:-1;
if( A.format() == 0) { // CSR
const int* rows = A.row_index();
for(int nr=0; nr<M; nr++,C++,rows++) {
*C*=beta;
for(int i=*rows; i<*(rows+1); i++,val++,cols++)
*C += alpha * (*val) * ( *( B + (*cols) + disp) );
}
} else { // EESL: Compressed Matrix
}
#endif
}
void product_SpMatV(const int M, const int K,
const RealType& alpha,
const RealType* val,
const int* col,
const int* row,
const RealType* B,
const RealType& beta,
RealType* C )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_dcsrmv( &trans, &M, &K, &alpha, matdes, val , col, row , row+1, B, &beta, C );
#else
APP_ABORT("ERROR: product_SpMatV only implemented with MKL. \n");
#endif
}
void product_SpMatV(const int M, const int K,
const ComplexType& alpha,
const ComplexType* val,
const int* col,
const int* row,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_zcsrmv( &trans, &M, &K, &alpha, matdes, val , col, row , row+1, B, &beta, C );
#else
APP_ABORT("ERROR: product_SpMatV only implemented with MKL. \n");
#endif
}
void product_SpMatTV(const int M, const int K,
const ComplexType& alpha,
const ComplexType* val,
const int* col,
const int* row,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C )
{
#if defined(HAVE_MKL)
char trans = 'T';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_zcsrmv( &trans, &M, &K, &alpha, matdes, val , col, row , row+1, B, &beta, C );
#else
APP_ABORT("ERROR: product_SpMatV only implemented with MKL. \n");
#endif
}
template<class T>
void product_SpMatTV(const int M, const int K,
const ComplexType& alpha,
const T& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C )
{
#if defined(HAVE_MKL)
char trans = 'T';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_zcsrmv( &trans, &M, &K, &alpha, matdes, A.values() , A.column_data(), A.row_index() , A.row_index()+1, B, &beta, C );
//#elif defined(HAVE_ESSL)
#else
APP_ABORT("ERROR: product_SpMatTV only implemented with MKL. \n");
ComplexType zero = ComplexType(0,0);
const ComplexType* val = A.values();
const int* cols = A.column_data();
int disp = (A.zero_base())?0:-1;
if( A.format() == 0) { // CSR
const int* rows = A.row_index();
for(int nr=0; nr<M; nr++,C++,rows++) {
*C*=beta;
for(int i=*rows; i<*(rows+1); i++,val++,cols++)
*C += alpha * (*val) * ( *( B + (*cols) + disp) );
}
} else { // EESL: Compressed Matrix
}
#endif
}
template<class T>
void product_SpMatM(const int M, const int N, const int K,
const ComplexType& alpha,
const T& A,
const ComplexType* B, const int ldb,
const ComplexType& beta,
ComplexType* C, int ldc )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_zcsrmm( &trans, &M, &N, &K, &alpha, matdes, A.values() , A.column_data(), A.row_index() , A.row_index()+1, B, &ldb, &beta, C, &ldc );
#else
APP_ABORT("ERROR: product_SpMatM only implemented with MKL. \n");
#endif
}
template<class T>
void product_SpMatM(const int M, const int N, const int K,
const RealType& alpha,
const T& A,
const RealType* B, const int ldb,
const RealType& beta,
RealType* C, int ldc )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_dcsrmm( &trans, &M, &N, &K, &alpha, matdes, A.values() , A.column_data(), A.row_index() , A.row_index()+1, B, &ldb, &beta, C, &ldc );
#else
APP_ABORT("ERROR: product_SpMatM only implemented with MKL. \n");
#endif
}
template<class T>
void product_SpMatM(const int M, const int N, const int K,
const float& alpha,
const T& A,
const float* B, const int ldb,
const float& beta,
float* C, int ldc )
{
#if defined(HAVE_MKL)
char trans = 'N';
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
mkl_scsrmm( &trans, &M, &N, &K, &alpha, matdes, A.values() , A.column_data(), A.row_index() , A.row_index()+1, B, &ldb, &beta, C, &ldc );
#else
APP_ABORT("ERROR: product_SpMatM only implemented with MKL. \n");
#endif
}
// Performs a product between a sparse matrix stored in format s2D and a dense
// matrix stored in c format
// N: number of rows in B/C
// M: number of columns in B/C
// LDB: leading dimension of B
//
// For eack term in A, aik
// C(i,:) += aik * B(k.:)
void product_SD(const IndexType K,
const s2D<RealType>* A, const int nterms,
ComplexType* B, const IndexType LDB,
ComplexType* C, IndexType LDC )
{
register RealType aik;
register IndexType ii,kk;
int cnt1,cnt2;
ComplexType* lit;
ComplexType* rit;
for(cnt1=0; cnt1<nterms; cnt1++,A++) {
std::tie(ii,kk,aik) = *A;
for(cnt2=0,lit=C+ii*LDC,rit=B+kk*LDB; cnt2<K; cnt2++,lit++,rit++)
*lit += aik*(*rit);
}
}
// Dot product between 2 sparse vectors
template<class T>
T product_SpVSpV(const int n1,const int* indx1, const T* A1, const int n2, const int* indx2, const T* A2) {
T res=T(0);
int i=0, j=0;
while( i<n1 && j<n2 ) {
if( *(indx1+i) < *(indx2+j) )
++i;
else if( *(indx2+j) < *(indx1+i) )
++j;
else {
res += *(A1+i) * (*(A2+j));
++i;++j;
}
}
return res;
}
bool sparseEigenSystem(RealSpMat &A, int& m0, RealType *eigval, RealType* eigVec, double Emin )
{
if(A.cols() != A.rows()) {
std::cerr<<"Problems in sparseEigenSystem: A matrix not squared. \n" <<std::endl;
return false;
}
if(!A.isCompressed()) {
std::cerr<<"Problems in sparseEigenSystem: A matrix not compressed. \n" <<std::endl;
return false;
}
#if defined(HAVE_MKL)
char UPLO('F');
int N = A.rows();
/* Declaration of FEAST variables */
MKL_INT fpm[128]; /* Array to pass parameters to Intel MKL Extended Eigensolvers */
//double Emin, Emax; /* Lower/upper bound of search interval [Emin,Emax] */
double Emax = 100; /* Lower/upper bound of search interval [Emin,Emax] */
double epsout; /* Relative error of the trace */
MKL_INT loop; /* Number of refinement loop */
//MKL_INT M0 = m0_; /* Initial guess for subspace dimension to be used */
MKL_INT M; /* Total number of eigenvalues found in the interval */
/* Declaration of local variables */
MKL_INT info; /* Errors */
std::vector<double> res(m0);
/* Step 1. Call FEASTINIT to define the default values for the input FEAST parameters */
feastinit(
fpm /* OUT: Array is used to pass parameters to Intel MKL Extended Eigensolvers */
);
m0 = 10;
M = 10;
loop = 0;
info = 0;
epsout = 0.0;
Emin = 1.0;
Emax = 2.0;
/*
N = 11;
MKL_INT rows[12] = { 1, 5, 10, 16, 23, 30, 37, 44, 51, 57, 62, 66 };
MKL_INT cols[65] = { 1, 2, 3, 4,
1, 2, 3, 4, 5,
1, 2, 3, 4, 5, 6,
1, 2, 3, 4, 5, 6, 7,
2, 3, 4, 5, 6, 7, 8,
3, 4, 5, 6, 7, 8, 9,
4, 5, 6, 7, 8, 9, 10,
5, 6, 7, 8, 9, 10, 11,
6, 7, 8, 9, 10, 11,
7, 8, 9, 10, 11,
8, 9, 10, 11
};
double val[65] = { 5.0, 2.0, 1.0, 1.0,
2.0, 6.0, 3.0, 1.0, 1.0,
1.0, 3.0, 6.0, 3.0, 1.0, 1.0,
1.0, 1.0, 3.0, 6.0, 3.0, 1.0, 1.0,
1.0, 1.0, 3.0, 6.0, 3.0, 1.0, 1.0,
1.0, 1.0, 3.0, 6.0, 3.0, 1.0, 1.0,
1.0, 1.0, 3.0, 6.0, 3.0, 1.0, 1.0,
1.0, 1.0, 3.0, 6.0, 3.0, 1.0, 1.0,
1.0, 1.0, 3.0, 6.0, 3.0, 1.0,
1.0, 1.0, 3.0, 6.0, 2.0,
1.0, 1.0, 2.0, 5.0 };
fpm[0] = 1;
Emin = 3;
Emax = 7;
m0 = 11;
M = 11;
*/
// dfeast_scsrev(
// &UPLO, /* IN: UPLO = 'F', stores the full matrix */
// &N, /* IN: Size of the problem */
// val, /* IN: CSR matrix A, values of non-zero elements */
// rows, /* IN: CSR matrix A, index of the first non-zero element in row */
// cols, /* IN: CSR matrix A, columns indices for each non-zero element */
// fpm, /* IN/OUT: Array is used to pass parameters to Intel MKL Extended Eigensolvers */
// &epsout, /* OUT: Relative error of on the trace */
// &loop, /* OUT: Contains the number of refinement loop executed */
// &Emin, /* IN: Lower bound of search interval */
// &Emax, /* IN: Upper bound of search interval */
// &m0, /* IN: The initial guess for subspace dimension to be used. */
// eigval, /* OUT: The first M entries of Eigenvalues */
// eigVec, /* IN/OUT: The first M entries of Eigenvectors */
// &M, /* OUT: The total number of eigenvalues found in the interval */
// res.data(), /* OUT: The first M components contain the relative residual vector */
// &info /* OUT: Error code */
// );
//cout<<"Routine dfeast_scsrev returns code of ERROR: " <<info <<std::endl;
//return false;
std::cout<<"\nEntering dfeast_scsrev routine." <<std::endl;
std::cout<<"Default subspace size: " <<fpm[4] <<std::endl;
std::cout<<"Problem size: " <<N <<std::endl;
std::cout<<"Available memory: ";
struct sysinfo si;
sysinfo(&si);
si.freeram+=si.bufferram;
std::cout<<int(si.freeram>>20) <<std::endl;
fpm[0] = 1;
fpm[4] = 1;
/* Step 2. Solve the standard Ax = ex eigenvalue problem. */
dfeast_scsrev(
&UPLO, /* IN: UPLO = 'F', stores the full matrix */
&N, /* IN: Size of the problem */
A.values(), /* IN: CSR matrix A, values of non-zero elements */
A.row_data(), /* IN: CSR matrix A, index of the first non-zero element in row */
A.column_data(), /* IN: CSR matrix A, columns indices for each non-zero element */
fpm, /* IN/OUT: Array is used to pass parameters to Intel MKL Extended Eigensolvers */
&epsout, /* OUT: Relative error of on the trace */
&loop, /* OUT: Contains the number of refinement loop executed */
&Emin, /* IN: Lower bound of search interval */
&Emax, /* IN: Upper bound of search interval */
&m0, /* IN: The initial guess for subspace dimension to be used. */
eigval, /* OUT: The first M entries of Eigenvalues */
eigVec, /* IN/OUT: The first M entries of Eigenvectors */
&M, /* OUT: The total number of eigenvalues found in the interval */
res.data(), /* OUT: The first M components contain the relative residual vector */
&info /* OUT: Error code */
);
if ( info != 0 )
{
std::cerr<<"Routine dfeast_scsrev returns code of ERROR: " <<info <<std::endl;
std::cout<<"Routine dfeast_scsrev returns code of ERROR: " <<info <<std::endl;
return false;
}
m0 = M;
return true;
#else
APP_ABORT("Error: sparseEigenSystem only implemented with MKL library. n");
return false;
#endif
}
bool sparseEigenSystem(ComplexSpMat &A, int& m0, RealType *eigval, ComplexType* eigVec, double Emin )
{
if(A.cols() != A.rows()) {
std::cerr<<"Problems in sparseEigenSystem: A matrix not squared. \n" <<std::endl;
return false;
}
if(!A.isCompressed()) {
std::cerr<<"Problems in sparseEigenSystem: A matrix not compressed. \n" <<std::endl;
return false;
}
#if defined(HAVE_MKL)
char UPLO('F');
int N = A.rows();
/* Declaration of FEAST variables */
MKL_INT fpm[128]; /* Array to pass parameters to Intel MKL Extended Eigensolvers */
//double Emin, Emax; /* Lower/upper bound of search interval [Emin,Emax] */
double Emax = 1e6; /* Lower/upper bound of search interval [Emin,Emax] */
double epsout; /* Relative error of the trace */
MKL_INT loop; /* Number of refinement loop */
//MKL_INT M0 = m0_; /* Initial guess for subspace dimension to be used */
MKL_INT M; /* Total number of eigenvalues found in the interval */
/* Declaration of local variables */
MKL_INT info; /* Errors */
std::vector<double> res(m0);
/* Step 1. Call FEASTINIT to define the default values for the input FEAST parameters */
feastinit(
fpm /* OUT: Array is used to pass parameters to Intel MKL Extended Eigensolvers */
);
std::cout<<"Entering zfeast_hcsrev routine. \n" <<std::endl;
/* Step 2. Solve the standard Ax = ex eigenvalue problem. */
zfeast_hcsrev(
&UPLO, /* IN: UPLO = 'F', stores the full matrix */
&N, /* IN: Size of the problem */
A.values(), /* IN: CSR matrix A, values of non-zero elements */
A.row_data(), /* IN: CSR matrix A, index of the first non-zero element in row */
A.column_data(), /* IN: CSR matrix A, columns indices for each non-zero element */
fpm, /* IN/OUT: Array is used to pass parameters to Intel MKL Extended Eigensolvers */
&epsout, /* OUT: Relative error of on the trace */
&loop, /* OUT: Contains the number of refinement loop executed */
&Emin, /* IN: Lower bound of search interval */
&Emax, /* IN: Upper bound of search interval */
&m0, /* IN: The initial guess for subspace dimension to be used. */
eigval, /* OUT: The first M entries of Eigenvalues */
eigVec, /* IN/OUT: The first M entries of Eigenvectors */
&M, /* OUT: The total number of eigenvalues found in the interval */
res.data(), /* OUT: The first M components contain the relative residual vector */
&info /* OUT: Error code */
);
if ( info != 0 )
{
std::cerr<<"Routine zfeast_hcsrev returns code of ERROR: " <<info <<std::endl;
return false;
}
m0 = M;
return true;
#else
APP_ABORT("Error: sparseEigenSystem only implemented with MKL library. n");
return false;
#endif
}
template
void product_SpMatV<ComplexSpMat>(const int nrows, const ComplexSpMat& A, const ComplexType* B, ComplexType* C);
template
void product_SpMatV<ComplexSMSpMat>(const int nrows, const ComplexSMSpMat& A, const ComplexType* B, ComplexType* C);
template
void product_SpMatV<ComplexSpMat>(const int M, const int K,
const ComplexType& alpha,
const ComplexSpMat& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template
void product_SpMatTV<ComplexSpMat>(const int M, const int K,
const ComplexType& alpha,
const ComplexSpMat& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template
void product_SpMatM<ComplexSpMat>(const int M, const int N, const int K,
const ComplexType& alpha,
const ComplexSpMat& A,
const ComplexType* B, int ldb,
const ComplexType& beta,
ComplexType* C, int ldc );
template
void product_SpMatM<RealSpMat>(const int M, const int N, const int K,
const RealType& alpha,
const RealSpMat& A,
const RealType* B, int ldb,
const RealType& beta,
RealType* C, int ldc );
template
void product_SpMatV<RealSMSpMat>(const int M, const int K,
const RealType& alpha,
const RealSMSpMat& A,
const RealType* B,
const RealType& beta,
RealType* C );
template
void product_SpMatV<ComplexSMSpMat>(const int M, const int K,
const ComplexType& alpha,
const ComplexSMSpMat& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template
void product_SpMatTV<ComplexSMSpMat>(const int M, const int K,
const ComplexType& alpha,
const ComplexSMSpMat& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template
void product_SpMatM<ComplexSMSpMat>(const int M, const int N, const int K,
const ComplexType& alpha,
const ComplexSMSpMat& A,
const ComplexType* B, int ldb,
const ComplexType& beta,
ComplexType* C, int ldc );
template
void product_SpMatM<RealSMSpMat>(const int M, const int N, const int K,
const RealType& alpha,
const RealSMSpMat& A,
const RealType* B, int ldb,
const RealType& beta,
RealType* C, int ldc );
template
void product_SpMatM<SMSparseMatrix<float>>(const int M, const int N, const int K,
const float& alpha,
const SMSparseMatrix<float>& A,
const float* B, int ldb,
const float& beta,
float* C, int ldc );
template
ComplexType product_SpVSpV(const int n1, const int* indx1, const ComplexType* A1, const int n2, const int* indx2, const ComplexType* A2);
template
RealType product_SpVSpV(const int n1, const int* indx1, const RealType* A1, const int n2, const int* indx2, const RealType* A2);
} // namespace SparseMatrixOperators
} // namespace qmcplusplus

View File

@ -0,0 +1,149 @@
#ifndef AFQMC_SPARSEMATRIXOPERATORS_H
#define AFQMC_SPARSEMATRIXOPERATORS_H
#include<iterator>
#include<tuple>
#include "AFQMC/config.h"
namespace qmcplusplus
{
namespace SparseMatrixOperators
{
// Performs a product between a sparse matrix stored in format s2D and a dense
// matrix stored in c format
// N: number of rows in B/C
// M: number of columns in B/C
// LDB: leading dimension of B
//
// For eack term in A, aik
// C(i,:) += aik * B(k.:)
void product_SD(const IndexType K,
const s2D<ComplexType>* A, const int nterms,
ComplexType* B, const IndexType LDB,
ComplexType* C, IndexType LDC );
template<class T>
void product_SpMatV(int nrows,
T& A,
ComplexType* B,
ComplexType* C );
template<class T>
void product_SpMatV(const int M, const int K,
const ComplexType& alpha,
const T& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template<class T>
void product_SpMatV(const int M, const int K,
const RealType& alpha,
const T& A,
const RealType* B,
const RealType& beta,
RealType* C );
void product_SpMatV(const int M, const int K,
const RealType& alpha,
const RealType* val,
const int* col,
const int* row,
const RealType* B,
const RealType& beta,
RealType* C );
void product_SpMatV(const int M, const int K,
const ComplexType& alpha,
const ComplexType* val,
const int* col,
const int* row,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
void product_SpMatTV(const int M, const int K,
const ComplexType& alpha,
const ComplexType* val,
const int* col,
const int* row,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template<class T>
void product_SpMatTV(const int M, const int K,
const ComplexType& alpha,
const T& A,
const ComplexType* B,
const ComplexType& beta,
ComplexType* C );
template<class T>
void product_SpMatM(const int M, const int N, const int K,
const ComplexType& alpha,
const T& A,
const ComplexType* B, const int ldb,
const ComplexType& beta,
ComplexType* C, int ldc );
template<class T>
void product_SpMatM(const int M, const int N, const int K,
const RealType& alpha,
const T& A,
const RealType* B, const int ldb,
const RealType& beta,
RealType* C, int ldc );
template<class T>
void product_SpMatM(const int M, const int N, const int K,
const float& alpha,
const T& A,
const float* B, const int ldb,
const float& beta,
float* C, int ldc );
// Performs a product between a sparse matrix stored in format s2D and a dense
// matrix stored in c format
// N: number of rows in B/C
// M: number of columns in B/C
// LDB: leading dimension of B
//
// For eack term in A, aik
// C(i,:) += aik * B(k.:)
void product_SD(const IndexType K,
const s2D<RealType>* A, const int nterms,
ComplexType* B, const IndexType LDB,
ComplexType* C, IndexType LDC );
// Dot product between 2 sparse vectors
template<class T>
T product_SpVSpV(const int n1, const int* indx1, const T* A1, const int n2, const int* indx2, const T* A2);
template<class T>
inline void transpose_SpMat(const T& A, T& AT)
{
AT.clear();
AT.setDims(A.cols(),A.rows());
AT.resize_arrays(A.size());
int n = A.size();
std::copy(A.values(),A.values()+n,AT.values());
std::copy(A.row_data(),A.row_data()+n,AT.column_data());
std::copy(A.column_data(),A.column_data()+n,AT.row_data());
AT.compress();
}
bool sparseEigenSystem(ComplexSpMat &A, int& m0, RealType *eigval, ComplexType* eigVec, double Emin );
bool sparseEigenSystem(RealSpMat &A, int& m0, RealType *eigval, RealType* eigVec, double Emin );
} // namespace SparseMatrixOperators
} // namespace qmcplusplus
#endif

View File

@ -0,0 +1,97 @@
#ifndef QMCPLUSPLUS_AFQMC_PROPAGATORBASE_H
#define QMCPLUSPLUS_AFQMC_PROPAGATORBASE_H
#include<fstream>
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
//#include "AFQMC/Walkers/SlaterDetWalker.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "Utilities/RandomGenerator.h"
#include "AFQMC/Estimators/SlaterDetOperations.h"
namespace qmcplusplus
{
class PropagatorBase: public MPIObjectBase, public AFQMCInfo
{
public:
PropagatorBase(Communicate *c, RandomGenerator_t* r): MPIObjectBase(c),TG(c,"PropagatorTG"),rng(r),Order_Taylor_Expansion(6),name(""),hdf_write_tag(""),hdf_write_file(""),hdf_read_tag(""),hdf_read_file(""),parallel_factorization(true),ncores_per_TG(1),nnodes_per_TG(1),parallelPropagation(false),distributeSpvn(false),core_rank(0)
{
}
~PropagatorBase() {}
// virtual void Propagate(int n, SlaterDetWalker&, RealType& E1, const RealType E2=0)=0;
virtual void Propagate(int n, WalkerHandlerBase*, RealType& E1, const RealType E2=0)=0;
virtual bool parse(xmlNodePtr)=0;
virtual bool setup(std::vector<int>&,ComplexSMVector*,HamiltonianBase*,WavefunctionHandler*, RealType dt, hdf_archive&, const std::string&, MPI_Comm, MPI_Comm)=0;
virtual bool hdf_write(hdf_archive&, const std::string&)=0;
virtual bool hdf_read(hdf_archive&,const std::string&)=0;
virtual void benchmark()=0;
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
SlaterDetOperations* SDetOps;
// timestep
RealType dt;
TaskGroup TG;
int ncores_per_TG,nnodes_per_TG;
int core_rank;
bool parallelPropagation;
bool distributeSpvn;
bool parallel_factorization;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
// used to sort snD values using only indexes
_mySort_snD_ mySort;
RandomGenerator_t* rng;
int Order_Taylor_Expansion;
// name of object
std::string name;
// id of HDF group of this object
// The actual datasets will be stored on:
// /Propagators/ACTUAL_PROPAGATOR/hdf_tag
std::string hdf_write_tag;
// hdf file where data will be stored. if a filename is found
// on the xml section of the propagator, it will be used.
// otherwise the one from the driver will be used.
std::string hdf_write_file;
std::string hdf_read_tag;
std::string hdf_read_file;
};
}
#endif

View File

@ -0,0 +1,373 @@
#include "Configuration.h"
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "OhmmsData/libxmldefs.h"
#include "io/hdf_archive.h"
#include "Message/CommOperators.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Wavefunctions/WavefunctionBase.h"
#include "AFQMC/Wavefunctions/PureSingleDeterminant.h"
#include "AFQMC/Walkers/SlaterDetWalker.h"
#include "AFQMC/Propagators/VMCPropagator.h"
#include "AFQMC/Hamiltonians/ProjectorBase.h"
#include "AFQMC/Hamiltonians/DDProjector.h"
#include "AFQMC/Hamiltonians/CCProjector.h"
#include"AFQMC/Numerics/SparseMatrixOperations.h"
#include"AFQMC/Numerics/DenseMatrixOperations.h"
#include "Utilities/RandomGenerator.h"
namespace qmcplusplus
{
bool VMCPropagator::parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
ParameterSet m_param;
// hdf
m_param.add(hdf_read_tag,"hdf_read_tag","std::string");
m_param.add(hdf_read_file,"hdf_read_file","std::string");
m_param.add(hdf_write_tag,"hdf_write_tag","std::string");
m_param.add(hdf_write_file,"hdf_write_file","std::string");
m_param.put(cur);
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="projector") {
std::string type("DD");
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(type,"type");
oAttrib.put(cur);
if(type=="DD" || type=="Gutzwiller" || type=="dd" || type == "gutzwiller" || type=="DDProjector" )
{
proj0 = new DDProjector(myComm);
proj0->parse(cur);
} else if(type=="CC" || type=="cc" || type=="cluster" || type=="Cluster" )
{
proj0 = new CCProjector(myComm);
proj0->parse(cur);
} else {
std::cerr<<"Unknown projector type: " <<type <<std::endl;
}
}
cur = cur->next;
}
return true;
}
bool VMCPropagator::hdf_write(hdf_archive& dump,const std::string& tag)
{
std::string path = "/Propagators/VMCPropagator";
if(tag != std::string("")) path += std::string("/")+tag;
if(dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Propagators/VMCPropagator/{tag} already exists in restart file. This is a bug and should not happen. Contact a developer.\n";
return false;
}
std::vector<int> Idata(4);
Idata[0]=Spvn.size();
Idata[1]=Spvn.rows();
Idata[2]=Spvn.cols();
Idata[3]=NMO;
dump.push("Propagators");
dump.push("VMCPropagator");
if(tag != std::string("")) dump.push(tag);
dump.write(Idata,"Spvn_dims");
dump.write(*(Spvn.getVals()),"Spvn_vals");
dump.write(*(Spvn.getRows()),"Spvn_rows");
dump.write(*(Spvn.getCols()),"Spvn_cols");
dump.write(*(Spvn.getRowIndex()),"Spvn_rowIndex");
if(tag != std::string("")) dump.pop();
dump.pop();
dump.pop();
dump.flush();
return true;
}
bool VMCPropagator::hdf_read(hdf_archive& dump,const std::string& tag)
{
std::vector<int> Idata(4);
if(!dump.push("Propagators",false)) return false;
if(!dump.push("VMCPropagator",false)) return false;
if(tag != std::string(""))
if(!dump.push(tag,false)) return false;
if(!dump.read(Idata,"Spvn_dims")) return false;
if(Idata[3] != NMO) {
app_error()<<" Error in VMCPropagator::hdf_read. NMO is not consistent between hdf5 file and current run. \n";
return false;
}
Spvn.setDims(Idata[1],Idata[2]);
Spvn.allocate_serial(Idata[0]);
Spvn.resize_serial(Idata[0]);
if(!dump.read(*(Spvn.getVals()),"Spvn_vals")) return false;
if(!dump.read(*(Spvn.getRows()),"Spvn_rows")) return false;
if(!dump.read(*(Spvn.getCols()),"Spvn_cols")) return false;
if(!dump.read(*(Spvn.getRowIndex()),"Spvn_rowIndex")) return false;
if(tag != std::string("")) dump.pop();
dump.pop();
dump.pop();
// check that everything went fine
return true;
}
bool VMCPropagator::setup(std::vector<int>& TGdata, ComplexSMVector *v, HamiltonianBase* ham,WavefunctionHandler* w, RealType dt_, hdf_archive& dump_read, const std::string& hdf_restart_tag,MPI_Comm tg_comm, MPI_Comm node_comm)
{
dt = dt_;
proj0->copyInfo(*this);
bool read_Spvn_from_file=false;
Spvn.setup(head_of_nodes,"Spvn",MPI_COMM_WORLD); // THIS IS WRONG FIX FIX FIX
proj0->init(ham);
// Only master tries to read
if(myComm->rank() == 0) {
if(hdf_read_file!=std::string("")) {
hdf_archive dump(myComm);
if(dump.open(hdf_read_file,H5F_ACC_RDONLY,false)) {
read_Spvn_from_file = hdf_read(dump,hdf_read_tag);
dump.close();
if(read_Spvn_from_file)
app_log()<<"Successfully read HS potentials from file: " <<hdf_read_file <<"\n";
}
} else {
if(dump_read.file_id != hdf_archive::is_closed) {
read_Spvn_from_file = hdf_read(dump_read,hdf_restart_tag);
if(read_Spvn_from_file)
app_log()<<"Successfully read HS potentials from restart file. \n";
else
while(dump_read.top() != hdf_archive::is_closed ) { dump_read.pop(); }
}
}
}
int success = read_Spvn_from_file?0:1;
myComm->bcast<int>(&success,1);
if(read_Spvn_from_file || !parallel_factorization) {
if(rank()==0) {
if(!read_Spvn_from_file) {
app_log()<<" Calculating HS potentials from scratch. \n";
Timer.reset("Generic1");
Timer.start("Generic1");
// calculates Hubbard-Stratonovich potentials (vn)
proj0->calculateHSPotentials(Spvn);
Timer.stop("Generic1");
app_log()<<" -- Time to calculate HS potentials: " <<Timer.average("Generic1") <<"\n";
}
std::vector<int> ni(3);
ni[0]=Spvn.size();
ni[1]=Spvn.rows();
ni[2]=Spvn.cols();
myComm->bcast(ni);
// do this later through a proper MPI object
myComm->bcast<RealType>(reinterpret_cast<RealType*>(Spvn.values()),2*Spvn.size(),MPI_COMM_HEAD_OF_NODES);
myComm->bcast<int>(Spvn.row_data(),Spvn.size(),MPI_COMM_HEAD_OF_NODES);
myComm->bcast<int>(Spvn.column_data(),Spvn.size(),MPI_COMM_HEAD_OF_NODES);
myComm->bcast<int>(Spvn.row_index(),Spvn.rows()+1,MPI_COMM_HEAD_OF_NODES);
myComm->barrier();
myComm->barrier();
} else {
std::vector<int> ni(3);
myComm->bcast(ni);
Spvn.setDims(ni[1],ni[2]);
if(head_of_nodes) {
Spvn.allocate_serial(ni[0]);
Spvn.resize_serial(ni[0]);
myComm->bcast<RealType>(reinterpret_cast<RealType*>(Spvn.values()),2*Spvn.size(),MPI_COMM_HEAD_OF_NODES);
myComm->bcast<int>(Spvn.row_data(),Spvn.size(),MPI_COMM_HEAD_OF_NODES);
myComm->bcast<int>(Spvn.column_data(),Spvn.size(),MPI_COMM_HEAD_OF_NODES);
myComm->bcast<int>(Spvn.row_index(),Spvn.rows()+1,MPI_COMM_HEAD_OF_NODES);
}
Spvn.setCompressed();
myComm->barrier();
if(!head_of_nodes) Spvn.initializeChildren();
myComm->barrier();
}
} else {
// calculating Spvn in parallel
APP_ABORT("Parallel calculation of Spvn not implemented\n");
}
// write restart if desired
if(rank()==0) {
if(hdf_write_file!=std::string("")) {
hdf_archive dump(myComm);
if(dump.create(hdf_write_file)) {
if(!hdf_write(dump,hdf_write_tag)) {
app_error()<<" Problems writing hdf5 file in VMCPropagator::setup(). \n";
return false;
}
dump.close();
} else {
app_error()<<" Problems opening hdf5 file in VMCPropagator::setup(). \n";
return false;
}
}
}
app_log()<<" -- Total number of terms in Cholesky vectors: " <<Spvn.size() <<std::endl;
// setup matrices
vHS.resize(2*NMO,NMO);
sigmaL.resize(Spvn.cols());
sigmaR.resize(Spvn.cols());
for(int i=0; i<sigmaR.size(); i++) sigmaR[i]=0;
for(int i=0; i<sigmaL.size(); i++) sigmaL[i]=0;
// setup temporary storage
T1.resize(NMO,NAEA);
T2.resize(NMO,NAEA);
SL.resize(2*NMO,NAEA);
SR.resize(2*NMO,NAEA);
return true;
}
// right now using local energy form of important sampling
void VMCPropagator::Propagate(int n, WalkerHandlerBase* w, RealType& accept, const RealType dummy)
{
/*
int sz = NMO*NAEA;
std::copy(w.SlaterMat.begin(),w.SlaterMat.begin()+2*sz,SL.begin());
std::copy(w.SlaterMat.begin()+2*sz,w.SlaterMat.begin()+4*sz,SR.begin());
// 1. sample gaussian field
Timer.start("Propagate::sampleGaussianFields");
sampleGaussianFields(sigmaL);
sampleGaussianFields(sigmaR);
Timer.stop("Propagate::sampleGaussianFields");
// 3. generate and apply HS propagator (or a good approx of it)
// to the S1 and S2 Slater Determinants. The projected determinants are
// returned in S1 and S2.
Timer.start("Propagate::applyHSPropagator");
applyHSPropagator(SL.data(),sigmaL,6);
applyHSPropagator(SR.data(),sigmaR,6);
Timer.stop("Propagate::applyHSPropagator");
Timer.start("Propagate::ovlp");
ComplexType wgt;
SDetOps->green_function(SL,SR,wgt,T1,false);
Timer.stop("Propagate::ovlp");
if( (*rng)() < std::abs(wgt)/std::abs(w.weight) ) {
accept++;
std::copy(SL.begin(),SL.end(),w.SlaterMat.begin());
std::copy(SR.begin(),SR.end(),w.SlaterMat.begin()+2*sz);
w.weight = wgt;
}
*/
}
void VMCPropagator::applyHSPropagator(ComplexType* SD, std::vector<ComplexType>& sigma, int order)
{
if(order < 0) order = Order_Taylor_Expansion;
ComplexType one = ComplexType(1.0,0.0);
ComplexType minusone = ComplexType(-1.0,0.0);
ComplexType zero = ComplexType(0.0,0.0);
for(ComplexMatrix::iterator it=vHS.begin(); it!=vHS.end(); it++) *it=zero;
Timer.start("build_vHS");
SparseMatrixOperators::product_SpMatV(Spvn.rows(),Spvn.cols(),one,Spvn,sigma.data(),zero,vHS.data());
Timer.stop("build_vHS");
// calculate exp(vHS)*S through a Taylor expansion of exp(vHS)
Timer.start("apply_expvHS_Ohmms");
int sz=NMO*NAEA;
std::copy(SD,SD+sz,T1.begin());
for(int n=1; n<=order; n++) {
ComplexType fact = static_cast<ComplexType>(1.0/static_cast<double>(n));
DenseMatrixOperators::product(NMO,NAEA,NMO,fact,vHS.data(),NMO,T1.data(),NAEA,zero,T2.data(),NAEA);
T1 = T2;
ComplexType* itSD = SD;
for(ComplexMatrix::iterator it=T1.begin(); it!=T1.end(); it++, itSD++)
*itSD += *it;
}
// if M1 == M2 on entry, no need to do this in spinRestricted case
std::copy(SD+sz,SD+2*sz,T1.begin());
for(int n=1; n<=order; n++) {
ComplexType fact = static_cast<ComplexType>(1.0/static_cast<double>(n));
DenseMatrixOperators::product(NMO,NAEB,NMO,fact,vHS.data()+NMO*NMO,NMO,T1.data(),NAEA,zero,T2.data(),NAEA);
T1 = T2;
ComplexType* itSD = SD+sz;
for(ComplexMatrix::iterator it=T1.begin(); it!=T1.end(); it++, itSD++)
*itSD += *it;
}
Timer.stop("apply_expvHS_Ohmms");
}
void VMCPropagator::sampleGaussianFields(std::vector<ComplexType>& sigma)
{
int n = sigma.size();
for (int i=0; i+1<n; i+=2)
{
RealType temp1=1-0.9999999999*(*rng)(), temp2=(*rng)();
sigma[i] =ComplexType(std::sqrt(-2.0*std::log(temp1))*std::cos(6.283185306*temp2),0.0);
sigma[i+1]=ComplexType(std::sqrt(-2.0*std::log(temp1))*std::sin(6.283185306*temp2),0.0);
}
if (n%2==1)
{
RealType temp1=1-0.9999999999*(*rng)(), temp2=(*rng)();
sigma[n-1]=ComplexType(std::sqrt(-2.0*std::log(temp1))*std::cos(6.283185306*temp2),0.0);
}
}
}

View File

@ -0,0 +1,77 @@
#ifndef QMCPLUSPLUS_AFQMC_VMCPROPAGATOR_H
#define QMCPLUSPLUS_AFQMC_VMCPROPAGATOR_H
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "Utilities/RandomGenerator.h"
#include "AFQMC/Estimators/SlaterDetOperations.h"
#include "AFQMC/Hamiltonians/ProjectorBase.h"
namespace qmcplusplus
{
class VMCPropagator: public PropagatorBase
{
public:
VMCPropagator(Communicate *c, RandomGenerator_t* r):PropagatorBase(c,r),use_eig(false),cutoff(1e-6)
{
}
~VMCPropagator() {}
void Propagate(int n, WalkerHandlerBase*, RealType& E1, const RealType E2=0);
bool parse(xmlNodePtr);
bool setup(std::vector<int>& TGdata, ComplexSMVector *v,HamiltonianBase*,WavefunctionHandler*, RealType dt, hdf_archive&, const std::string&,MPI_Comm tg_comm, MPI_Comm node_comm);
bool hdf_write(hdf_archive&, const std::string&);
bool hdf_read(hdf_archive&,const std::string&);
void benchmark() {};
private:
ProjectorBase* proj0;
bool use_eig;
double cutoff;
ComplexMatrix vHS;
ComplexSMSpMat Spvn;
SlaterDetOperations* Sdet;
// local storage
ComplexMatrix T1;
ComplexMatrix T2;
ComplexMatrix SL, SR;
std::vector<ComplexType> sigmaL;
std::vector<ComplexType> sigmaR;
void applyHSPropagator(ComplexType*, std::vector<ComplexType>& , int order=-1);
void sampleGaussianFields(std::vector<ComplexType>&);
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,228 @@
#ifndef QMCPLUSPLUS_AFQMC_PHASELESS_WITHIMPSAMPLWITHELOC_FORCEBIAS
#define QMCPLUSPLUS_AFQMC_PHASELESS_WITHIMPSAMPLWITHELOC_FORCEBIAS
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "io/hdf_archive.h"
#include "OhmmsData/libxmldefs.h"
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
//#include "AFQMC/Walkers/SlaterDetWalker.h"
#include "AFQMC/Propagators/PropagatorBase.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
#include "AFQMC/Sandbox/compare_libraries.h"
namespace qmcplusplus
{
class phaseless_ImpSamp_ForceBias: public PropagatorBase
{
typedef phaseless_ImpSamp_ForceBias thisClass;
typedef phaseless_ImpSamp_ForceBias* thisClassPtr;
public:
phaseless_ImpSamp_ForceBias(Communicate *c, RandomGenerator_t* r) : PropagatorBase(c,r), substractMF(true),use_eig(false),first(true),max_weight(100),apply_constrain(true),save_memory(false),vbias_bound(3.0),imp_sampl(true),hybrid_method(false),test_library(false),eloc_from_Spvn(false),sizeOfG(0)
{
}
~phaseless_ImpSamp_ForceBias() {}
// void Propagate(int n, SlaterDetWalker&, RealType& E1, const RealType E2=0);
void Propagate(int n, WalkerHandlerBase*, RealType& E1, const RealType E2=0);
bool setup(std::vector<int>&,ComplexSMVector*,HamiltonianBase*,WavefunctionHandler*,RealType, hdf_archive&, const std::string&,MPI_Comm,MPI_Comm);
bool parse(xmlNodePtr);
bool hdf_write(hdf_archive&, const std::string&);
bool hdf_read(hdf_archive&, const std::string&);
void benchmark(){
//PureSingleDeterminant* sd = dynamic_cast<PureSingleDeterminant*>(wfn->ImpSampWfn);
//compare_libraries(NMO,NAEA,NAEB,Propg_H1,Propg_H1_indx,sd->Vijkl,vn,vn_indx);
}
private:
std::ofstream out_debug;
bool hybrid_method;
bool eloc_from_Spvn;
bool imp_sampl;
bool substractMF;
bool use_eig;
bool first;
bool save_memory;
int test_library;
std::ifstream in_rand;
RealType cutoff;
// one-body operator that defines H1
// stored in sparse form
std::vector<s2D<ValueType> > H1;
IndexType H1_nalpha;
// propagator for the one-body hamiltonian plus any other one-body term
// from the HS transformation and/or mean-field removal
std::vector<s2D<ComplexType> > Propg_H1;
std::vector<IndexType> Propg_H1_indx;
// HS potential: sum_n (sigma_n - vbias_n) * v2_n
// This is stored full
ComplexMatrix vHS;
ComplexMatrix PHS;
// potentials that represent the decomposition of Vijkl into a quadratic form
// Vijkl = -0.5 * sum_n (v2_n)^2 + v0, where v0 is stored somewhere else
// stored in sparse form and sequentially
// vn_nterms is a vector that contains the number of non-zero terms (above cutoff)
// in each term in vn. This allows for easier parsing.
std::vector<s2D<ComplexType> > vn;
std::vector<IndexType> vn_indx;
Vector< Vector<s2D<ValueType> >::iterator > vn_bounds;
// // new storage format for HS potentials
// ComplexSpMat Spvn;
// ComplexSpMat SpvnT;
// // this is a pointer to either Spvn or SpvnT, to avoid extra logic in code
// ComplexSpMat *Spvn_for_onebody;
int GlobalSpvnSize;
std:: vector<int> nCholVec_per_node;
ComplexSMSpMat Spvn;
ComplexSMSpMat SpvnT;
// this is a pointer to either Spvn or SpvnT, to avoid extra logic in code
ComplexSMSpMat *Spvn_for_onebody;
// storage for fields
std::vector<RealType> sigma;
std::vector<ComplexType> CV0;
// Force bias potential, typically mixed potential but can also be MF potential
std::vector<ComplexType> vbias;
// Mean-field subtraction of vHS
std::vector<ComplexType> vMF;
// used to calculate mean fields, overlaps, local energies, etc
WavefunctionHandler* wfn;
// local storage
ComplexMatrix T1;
ComplexMatrix T2;
ComplexMatrix S1;
ComplexMatrix S2;
ComplexSMVector local_buffer;
std::vector<ComplexType> MFfactor;
std::vector<ComplexType> hybrid_weight;
RealType dEloc;
ComplexType max_weight;
RealType vbias_bound;
int sizeOfG;
int nCholVecs; // total number of Cholesky Vectors in the calculation
int cvec0,cvecN; // index of first and last+1 Cholesky Vector of this core
std::vector<int> walker_per_node;
// ik breakup of Spvn
IndexType ik0, ikN; // minimum and maximum values of ik index in Spvn
IndexType pik0, pikN; // locations of bounds of [ik0,ikN] sector in Spvn
void dist_Propagate(WalkerHandlerBase*);
bool apply_constrain;
void applyHSPropagator(ComplexMatrix&, ComplexMatrix&, ComplexType& factor, int order=-1, bool calculatevHS=true);
void addvHS(ComplexSMVector *buff, int nw, int sz, WalkerHandlerBase* wset);
void sampleGaussianFields();
void sampleGaussianFields(ComplexType*,int);
inline IndexType Index2Mat(const IndexType I, const IndexType J) const {
return (J<NMO)?(I*NMO+J):(I*NMO+J-NMO);
}
inline ComplexType apply_bound_eloc(const ComplexType e, const RealType eshift) const
{
// Leaving the imag part untouched, since it is not used.
// Only stored in case it might be useful in the future.
return ComplexType(std::max( std::min( e.real(), eshift+dEloc ), eshift-dEloc ),e.imag());
}
inline ComplexType apply_bound_weight(const ComplexType w ) const
{
return (std::abs(w)>std::abs(max_weight))?max_weight:w;
}
inline void apply_bound_vbias(ComplexType* vec, int n)
{
RealType mag=0.0;
for(int i=0; i<n; i++,vec++) {
mag = std::abs(*vec);
if(mag > vbias_bound) (*vec)/=(mag/vbias_bound);
}
}
inline void apply_bound_vbias()
{
RealType mag=0.0;
for(int i=0; i<vbias.size(); i++) {
mag = std::abs(vbias[i]);
if(mag > vbias_bound) vbias[i]/=(mag/vbias_bound);
}
}
inline ComplexType apply_bound_vbias(ComplexType v)
{
return (std::abs(v)>vbias_bound)?(v/(std::abs(v)/vbias_bound)):(v);
}
void print_tuple(std::vector<s2D<ComplexType> >& v) {
for(int i=0; i<v.size(); i++)
std::cout<<" - " <<std::get<0>(v[i]) <<" " <<std::get<1>(v[i]) <<" " <<std::get<2>(v[i]) <<std::endl;
}
void print_octave(std::ofstream& out, ComplexMatrix& M) {
int nC = M.cols();
for(int i=0; i<NMO; i++) {
for(int j=0; j<nC; j++)
out<<"complex(" <<M(i,j).real() <<"," <<M(i,j).imag() <<") ";
out<<std::endl;
}
}
void test_linear_algebra();
};
}
#endif

View File

@ -0,0 +1,496 @@
#include "AFQMC/config.h"
#include<cstdlib>
#include<algorithm>
#include<complex>
#include<iostream>
#include<fstream>
#include<map>
#include<utility>
#include<random>
#include "AFQMC/Sandbox/compare_libraries.h"
//#include "AFQMC/Numerics/DenseMatrixOperations.h"
//#include "AFQMC/Numerics/SparseMatrixOperations.h"
#if defined(HAVE_MKL)
#include "mkl.h"
#include "mkl_service.h"
#endif
namespace qmcplusplus
{
typedef std::vector<s2D<ValueType> >::iterator s2Dit;
void compare_libraries(int NMO, int NAEA, int NAEB, std::vector<s2D<ComplexType> >& Propg_H1, std::vector<IndexType>& Propg_H1_indx, std::vector<s2D<ValueType> >& Vuv, std::vector<s2D<ComplexType> >& vn, std::vector<IndexType>& vn_indx) {
#if defined(HAVE_MKL)
int ntimes=10;
std::vector<ComplexType> sigma(vn_indx.size()-1);
std::default_random_engine generator;
std::normal_distribution<double> distribution(0.0,1.0);
for (int i=0; i<sigma.size(); ++i)
sigma[i] = static_cast<ComplexType>(distribution(generator));
// full matrices
ComplexMatrix P0(2*NMO,NMO), V0(2*NMO*NMO,2*NMO*NMO);
for(int i=Propg_H1_indx[0]; i<Propg_H1_indx[1]+Propg_H1_indx[0]; i++) {
IndexType a,b;
ComplexType v;
std::tie(a,b,v) = Propg_H1[i];
P0(a,b)=v;
}
/*
for(int i=0; i<2*NMO*NMO; i++)
for(int j=0; j<2*NMO*NMO; j++) V0(i,j)=ComplexType(0.0);
for(int i=0; i<Vuv.size(); i++) {
IndexType a,b;
ComplexType v;
std::tie(a,b,v) = Vuv[i];
V0(a,b)+=v;
}
*/
/*
for(int i=0; i<Vuv.size(); i++) {
IndexType ii,kk,jj,ll;
ii = a/NMO;
kk = a%NMO;
jj = b/NMO;
ll = b%NMO;
a = (ii*NMO)+ll;
b = (jj*NMO)+kk;
V0(a,b)-=v;
V2uv.push_back(std::forward_as_tuple(a,b,v));
}
*/
ComplexMatrix S0(2*NMO,NAEA), D0(2*NMO,NMO), vHS(2*NMO,NMO);
for(int i=0; i<NAEA; i++) S0(i,i)=ComplexType(1.0);
for(int i=0; i<NAEA; i++) D0(i,i)=ComplexType(1.0);
for(int i=0; i<NAEB; i++) S0(i+NMO,i)=ComplexType(1.0);
for(int i=0; i<NAEB; i++) D0(i+NMO,i)=ComplexType(1.0);
ComplexType one(1.0,0.0);
ComplexType zero(0.0,0.0);
std::vector<ComplexType> D0_mkl(2*NMO*NMO);
for(int i=0; i<2*NMO*NMO; i++) D0_mkl[i] = zero;
for(int i=0; i<NAEA; i++) D0_mkl[i*NMO+i] = one;
for(int i=0; i<NAEB; i++) D0_mkl[(i+NMO)*NMO+i] = one;
ComplexMatrix::iterator itG = D0.begin();
ComplexType epot_manual = 0;
s2Dit end2 = Vuv.end();
for(s2Dit it = Vuv.begin(); it != end2; it++)
epot_manual += (*(itG + std::get<0>(*it))) * (*(itG + std::get<1>(*it))) * std::get<2>(*it);
std::cout<<std::endl <<std::endl;
std::cout<<"******************************************************* \n";
std::cout<<" TESTING PERFORMANCE OF VARIOUS LINEAR ALGEBRA PACKAGES \n";
std::cout<<"******************************************************* \n";
std::cout<<std::endl <<std::endl;
std::cout<<" OhmmsPETE + mySparse(mkl): \n\n";
ComplexMatrix vHS_manual(2*NMO,NMO);
vHS_manual=ComplexType(0.0);
for(int i=0; i<vn_indx.size()-1; i++) {
ComplexType scl = sigma[i];
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++)
vHS_manual( std::get<0>( vn[n] ) , std::get<1>( vn[n] ) ) -= scl*std::get<2>( vn[n] );
}
// OhmmsPETE + mySparseMatrix
std::string str;
str="vn_manual";
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
vHS=ComplexType(0.0);
Timer.start(str.c_str());
for(int i=0; i<vn_indx.size()-1; i++) {
ComplexType scl = sigma[i];
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++)
vHS( std::get<0>( vn[n] ) , std::get<1>( vn[n] ) ) -= scl*std::get<2>( vn[n] );
}
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
std::vector<int> vn_1dindx(vn.size());
std::vector<ComplexType> vn_1ddata(vn.size());
for(int i=0; i<vn_indx.size()-1; i++)
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++) {
vn_1dindx[n] = std::get<0>(vn[n])*NMO+std::get<1>(vn[n]);
vn_1ddata[n] = std::get<2>(vn[n]);
}
// switching sigma to -sigma
for(int i=0; i<vn_indx.size()-1; i++)
sigma[i] *= ComplexType(-1.0);
ComplexMatrix vHS_zaxpyi(2*NMO,NMO);
vHS_zaxpyi=ComplexType(0.0);
for(int i=0; i<vn_indx.size()-1; i++)
if(vn_indx[i+1] > vn_indx[i])
cblas_zaxpyi (vn_indx[i+1]-vn_indx[i], &(sigma[i]), &(vn_1ddata[vn_indx[i]]), &(vn_1dindx[vn_indx[i]]), vHS_zaxpyi.data());
std::cout<<"Magnitude of difference between vHS_manual and vHS_zaxpyi: \n";
RealType diff = 0.0;
for(int i=0; i<2*NMO; i++)
for(int j=0; j<NMO; j++)
diff += std::abs(vHS_manual(i,j)-vHS_zaxpyi(i,j));
std::cout<<diff <<std::endl;
mkl_set_dynamic( 0 );
str="vn_mkl_cblas_zaxpyi 1 thr";
mkl_set_num_threads(1);
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
vHS=ComplexType(0.0);
Timer.start(str.c_str());
for(int i=0; i<vn_indx.size()-1; i++)
if(vn_indx[i+1] > vn_indx[i])
cblas_zaxpyi (vn_indx[i+1]-vn_indx[i], &(sigma[i]), &(vn_1ddata[vn_indx[i]]), &(vn_1dindx[vn_indx[i]]), vHS.data());
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
/*
str="vn_mkl_cblas_zaxpyi 2 thr";
mkl_set_num_threads(2);
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
vHS=ComplexType(0.0);
Timer.start(str.c_str());
for(int i=0; i<vn_indx.size()-1; i++)
if(vn_indx[i+1] > vn_indx[i])
cblas_zaxpyi (vn_indx[i+1]-vn_indx[i], &(sigma[i]), &(vn_1ddata[vn_indx[i]]), &(vn_1dindx[vn_indx[i]]), vHS.data());
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
str="vn_mkl_cblas_zaxpyi 4 thr";
mkl_set_num_threads(4);
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
vHS=ComplexType(0.0);
Timer.start(str.c_str());
for(int i=0; i<vn_indx.size()-1; i++)
if(vn_indx[i+1] > vn_indx[i])
cblas_zaxpyi (vn_indx[i+1]-vn_indx[i], &(sigma[i]), &(vn_1ddata[vn_indx[i]]), &(vn_1dindx[vn_indx[i]]), vHS.data());
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
str="vn_mkl_cblas_zaxpyi 8 thr";
mkl_set_num_threads(8);
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
vHS=ComplexType(0.0);
Timer.start(str.c_str());
for(int i=0; i<vn_indx.size()-1; i++)
if(vn_indx[i+1] > vn_indx[i])
cblas_zaxpyi (vn_indx[i+1]-vn_indx[i], &(sigma[i]), &(vn_1ddata[vn_indx[i]]), &(vn_1dindx[vn_indx[i]]), vHS.data());
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
str="vn_mkl_cblas_zaxpyi 16 thr";
mkl_set_num_threads(16);
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
vHS=ComplexType(0.0);
Timer.start(str.c_str());
for(int i=0; i<vn_indx.size()-1; i++)
if(vn_indx[i+1] > vn_indx[i])
cblas_zaxpyi (vn_indx[i+1]-vn_indx[i], &(sigma[i]), &(vn_1ddata[vn_indx[i]]), &(vn_1dindx[vn_indx[i]]), vHS.data());
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
*/
/*
ComplexMatrix vn_full(2*NMO*NMO,vn_indx.size()-1);
for(int i=0; i<vn_indx.size()-1; i++)
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++)
vn_full( std::get<0>( vn[n] )*NMO+std::get<1>( vn[n] ), i ) = std::get<2>( vn[n] );
std::vector<int> vn_1drows(2*NMO*NMO);
std::vector<ComplexType> vn_1ddata2(vn.size());
std::vector<ComplexType> sigma2(vn_indx.size()-1);
std::vector<ComplexType> vHS2(2*NMO*NMO);
for(int i=0, cnt=0; i<2*NMO*NMO; i++) {
vn_1drows[i]=cnt;
for(int j=0; j<vn_indx.size()-1; j++) {
if(std::abs(vn_full(i,j)) > 1e-8) {
vn_1dindx[cnt] = j;
vn_1ddata2[cnt].real = vn_full(i,j).real();
vn_1ddata2[cnt].imag = vn_full(i,j).imag();
}
}
}
*/
ComplexMatrix vn_full(NMO*NMO,vn_indx.size()-1);
for(int i=0; i<vn_indx.size()-1; i++)
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++)
vn_full( std::get<0>( vn[n] )*NMO+std::get<1>( vn[n] ), i ) = std::get<2>( vn[n] );
diff=0.0;
for(int i=0; i<NMO; i++) {
for(int k=0; k<NMO; k++) {
ComplexType t;
int ik = i*NMO+k;
for(int j=0; j<vn_indx.size()-1; j++) {
t += vn_full(ik,j)*sigma[j];
}
diff += std::abs( vHS_manual(i,k) - t );
}
}
std::cout<<"Magnitude of difference between vHS_manual and vHS_full: \n";
std::cout<<diff <<std::endl;
std::vector<int> vn_1drows(NMO*NMO+1);
std::vector<ComplexType> vn_1ddata2(vn.size());
std::vector<ComplexType> sigma2(vn_indx.size()-1);
std::vector<ComplexType> vHS2(2*NMO*NMO);
int cnt=0;
for(int i=0; i<NMO*NMO; i++) {
vn_1drows[i]=cnt;
for(int j=0; j<vn_indx.size()-1; j++) {
if(std::abs(vn_full(i,j)) > 1e-10) {
vn_1dindx[cnt] = j;
vn_1ddata2[cnt++] = vn_full(i,j);
}
}
}
vn_1drows.back() = vn_1ddata2.size();
for(int j=0; j<vn_indx.size()-1; j++)
sigma2[j] = sigma[j];
int nrows = vn_full.rows();
int ncols = vn_full.cols();
char trans = 'N';
for(int i=0; i<2*NMO*NMO; i++) vHS2[i] = 0.0;
std::vector<int> vn_1drows3(NMO*NMO+1);
std::vector<int> vn_1dindx3(vn.size());
std::vector<ComplexType> vn_1ddata3(vn.size());
for(int i=0; i<vn_indx.size()-1; i++) {
vn_1drows3[i] = vn_indx[i];
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++) {
vn_1ddata3[n] = std::get<2>( vn[n] );
vn_1dindx3[n] = std::get<0>( vn[n] )*NMO + std::get<1>( vn[n] );
}
}
vn_1drows3.back() = vn_1ddata3.size();
mkl_set_num_threads(1);
char matdes[6];
matdes[0] = 'G';
matdes[3] = 'C';
str="vn_mkl_zcsrmv (transposed) 1 thr";
trans = 'T';
mkl_zcsrmv( &trans, &ncols, &nrows, &one, matdes, vn_1ddata3.data() , vn_1dindx3.data(), vn_1drows3.data() , &(vn_1drows3[1]), sigma2.data(), &zero, vHS2.data() );
std::cout<<"Magnitude of difference between vHS_manual and vHS_zcsrmv (in transposed form): \n";
diff = 0.0;
for(int i=0, k=0; i<2*NMO; i++)
for(int j=0; j<NMO; j++,k++)
diff += std::abs(vHS_manual(i,j)-vHS2[k]);
std::cout<<diff <<std::endl;
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
Timer.start(str.c_str());
mkl_zcsrmv( &trans, &ncols, &nrows, &one, matdes, vn_1ddata3.data() , vn_1dindx3.data(), vn_1drows3.data() , &(vn_1drows3[1]), sigma2.data(), &zero, vHS2.data() );
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
str="vn_mkl_zcsrmv 1 thr";
trans = 'N';
mkl_zcsrmv( &trans, &nrows, &ncols, &one, matdes, vn_1ddata2.data() , vn_1dindx.data(), vn_1drows.data() , &(vn_1drows[1]), sigma2.data(), &zero, vHS2.data() );
std::cout<<"Magnitude of difference between vHS_manual and vHS_zcsrmv: \n";
diff = 0.0;
for(int i=0, k=0; i<2*NMO; i++)
for(int j=0; j<NMO; j++,k++)
diff += std::abs(vHS_manual(i,j)-vHS2[k]);
std::cout<<diff <<std::endl;
//mkl_set_num_threads(1);
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
Timer.start(str.c_str());
mkl_zcsrmv( &trans, &nrows, &ncols, &one, matdes, vn_1ddata2.data() , vn_1dindx.data(), vn_1drows.data() , &(vn_1drows[1]), sigma2.data(), &zero, vHS2.data() );
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
std::cout<<"\n\n\n**********************************************************\n";
std::cout<<" Testing local energy \n";
std::cout<<"**********************************************************\n";
str="epot_manual";
itG = D0.begin();
end2 = Vuv.end();
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
epot_manual = 0;
Timer.start(str.c_str());
for(s2Dit it = Vuv.begin(); it != end2; it++)
epot_manual += (*(itG + std::get<0>(*it))) * (*(itG + std::get<1>(*it))) * std::get<2>(*it);
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
vn_1ddata2.resize(Vuv.size());
vn_1dindx.resize(Vuv.size());
vn_1drows.resize(2*NMO*NMO+1);
vHS2.resize(2*NMO*NMO);
int curr=-1;
for(int n=0; n<Vuv.size(); n++) {
if( std::get<0>(Vuv[n]) != curr ) {
int old = curr;
curr = std::get<0>(Vuv[n]);
for(int i=old+1; i<=curr; i++) vn_1drows[i] = n;
}
vn_1ddata2[n] = std::get<2>(Vuv[n]);
vn_1dindx[n] = std::get<1>(Vuv[n]);
}
for(int i=curr+1; i<vn_1drows.size(); i++)
vn_1drows[i] = vn_1ddata2.size();
int one_int = 1;
trans = 'N';
nrows = ncols = 2*NMO*NMO;
matdes[0] = 'G';
matdes[3] = 'C';
mkl_zcsrmv( &trans, &nrows, &ncols, &one, matdes, vn_1ddata2.data() , vn_1dindx.data(), vn_1drows.data() , &(vn_1drows[1]), D0_mkl.data(), &zero, vHS2.data() );
ComplexType epot_mkl = 0;
for(int i=0; i<2*NMO*NMO; i++) epot_mkl += vHS2[i] * D0_mkl[i];
std::cout<<"Difference between epot_manual and epot_mkl: " <<epot_manual-epot_mkl <<std::endl;
str="epot_mkl_zcsrmv";
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
epot_mkl = 0;
Timer.start(str.c_str());
mkl_zcsrmv( &trans, &nrows, &ncols, &one, matdes, vn_1ddata2.data() , vn_1dindx.data(), vn_1drows.data() , &(vn_1drows[1]), D0_mkl.data(), &zero, vHS2.data() );
for(int i=0; i<2*NMO*NMO; i++) epot_mkl += vHS2[i] * D0_mkl[i];
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
mkl_cspblas_zcsrgemv (&trans, &nrows, vn_1ddata2.data() , vn_1drows.data(), vn_1dindx.data(), D0_mkl.data(), vHS2.data());
epot_mkl = 0;
for(int i=0; i<2*NMO*NMO; i++) epot_mkl += vHS2[i] * D0_mkl[i];
std::cout<<"Difference between epot_manual and epot_cspblas_zcsrgemv: " <<epot_manual-epot_mkl <<std::endl;
str="epot_mkl_cspblas_zcsrgemv";
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
epot_mkl = 0;
Timer.start(str.c_str());
mkl_cspblas_zcsrgemv (&trans, &nrows, vn_1ddata2.data() , vn_1drows.data(), vn_1dindx.data(), D0_mkl.data(), vHS2.data());
for(int i=0; i<2*NMO*NMO; i++) epot_mkl += vHS2[i] * D0_mkl[i];
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
/*
std::vector<std::complex<float> > vn_1ddata4(vn_1ddata2.size());
std::vector<std::complex<float> > vHS4(vHS2.size());
std::vector<std::complex<float> > D0_mkl_float(D0_mkl.size());
for(int i=0; i<vn_1ddata4.size(); i++) vn_1ddata4[i] = static_cast<std::complex<float> >(vn_1ddata2[i]);
for(int i=0; i<vHS4.size(); i++) vHS4[i] = 0;
for(int i=0; i<D0_mkl_float.size(); i++) D0_mkl_float[i] = static_cast<std::complex<float> >(D0_mkl[i]);
mkl_cspblas_ccsrgemv (&trans, &nrows, vn_1ddata4.data() , vn_1drows.data(), vn_1dindx.data(), D0_mkl_float.data(), vHS4.data());
std::complex<float> epot_mkl_float = 0;
for(int i=0; i<2*NMO*NMO; i++) epot_mkl_float += vHS4[i] * D0_mkl_float[i];
std::cout<<"Difference between epot_manual and epot_cspblas_zcsrgemv(float): " <<epot_manual-epot_mkl_float <<std::endl;
str="epot_mkl_cspblas_zcsrgemv(float)";
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
epot_mkl_float = 0;
Timer.start(str.c_str());
//mkl_cspblas_ccsrgemv (&trans, &nrows, vn_1ddata4.data() , vn_1drows.data(), vn_1dindx.data(), D0_mkl_float.data(), vHS4.data());
for(int i=0; i<2*NMO*NMO; i++) epot_mkl_float += vHS4[i] * D0_mkl_float[i];
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
*/
std::cout<<"\n\n\n**********************************************************\n";
std::cout<<" Testing SparseMatrix class \n";
std::cout<<"**********************************************************\n";
std::cout<<"\n\n\n**********************************************************\n";
std::cout<<" Testing potential energy \n";
std::cout<<"**********************************************************\n";
ComplexSpMat SpVuv(2*NMO*NMO);
SpVuv.reserve(Vuv.size());
for(int n=0; n<Vuv.size(); n++) {
ComplexType t(std::get<2>(Vuv[n]));
SpVuv.add( std::get<0>(Vuv[n]) , std::get<1>(Vuv[n]), t);
}
SpVuv.compress();
mkl_cspblas_zcsrgemv (&trans, &nrows, SpVuv.values() , SpVuv.row_index(), SpVuv.column_data(), D0_mkl.data(), vHS2.data());
epot_mkl = 0;
for(int i=0; i<2*NMO*NMO; i++) epot_mkl += vHS2[i] * D0_mkl[i];
std::cout<<"Difference between epot_manual and epot_spmat_cspblas_zcsrgemv: " <<epot_manual-epot_mkl <<std::endl;
str="epot_spmat_mkl_cspblas_zcsrgemv";
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
epot_mkl = 0;
Timer.start(str.c_str());
mkl_cspblas_zcsrgemv (&trans, &nrows, SpVuv.values() , SpVuv.row_index(), SpVuv.column_data(), D0_mkl.data(), vHS2.data());
for(int i=0; i<2*NMO*NMO; i++) epot_mkl += vHS2[i] * D0_mkl[i];
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
/*
std::cout<<std::endl <<std::endl <<" EIGEN: \n\n";
std::vector<Eigen::SparseMatrix<ComplexType> > vn_eigen;
Eigen::Matrix<ComplexType,Eigen::Dynamic, Eigen::Dynamic> eig_vHS(2*NMO,NMO);
vn_eigen.resize(vn_indx.size()-1);
for(int i=0; i<vn_indx.size()-1; i++) {
vn_eigen[i].resize(2*NMO,NMO);
vn_eigen[i].reserve(vn_indx[i+1]-vn_indx[i]);
for(int n = vn_indx[i]; n<vn_indx[i+1]; n++)
vn_eigen[i].insert(std::get<0>( vn[n] ),std::get<1>( vn[n] )) = std::get<2>( vn[n] );
vn_eigen[i].makeCompressed();
}
str="vn_Eigen";
Timer.reset(str.c_str());
for(int nt=0; nt<ntimes; nt++) {
eig_vHS.setZero();
Timer.start(str.c_str());
for(int i=0; i<vn_eigen.size(); i++)
eig_vHS += sigma[i]*vn_eigen[i];
Timer.stop(str.c_str());
}
std::cout<<str <<": " <<Timer.average(str.c_str()) <<std::endl <<std::endl;
*/
#endif
}
}

View File

@ -0,0 +1,14 @@
#ifndef AFQMC_COMPARELIBRARIES
#define AFQMC_COMPARELIBRARIES
#include <vector>
#include "AFQMC/config.h"
namespace qmcplusplus
{
void compare_libraries(int NMO, int NAEA, int NAEB, std::vector<s2D<ComplexType> >& Propg_H1, std::vector<IndexType>& Propg_H1_indx, std::vector<s2D<ValueType> >& Vuv, std::vector<s2D<ComplexType> >& vn, std::vector<IndexType>& vn_indx);
}
#endif

173
src/AFQMC/Utilities/SQCUtils.cpp Executable file
View File

@ -0,0 +1,173 @@
#include<cmath>
#include<cassert>
#include<iostream>
#include<cstdlib>
#include<string>
#include <bitset>
#include <sys/time.h>
#include <ctime>
#if defined(USE_MPI)
#include<mpi.h>
#endif
#include"Utilities/SQCUtils.h"
#include "sprng.h"
void SQCAbort(std::string text, int id) {
std::cerr<<text <<std::endl;
std::cerr.flush();
std::cout.flush();
if(id==0) id=1;
#if defined(USE_MPI)
MPI_Abort(MPI_COMM_WORLD,id);
#else
exit(id);
#endif
}
unsigned long int toUL(unsigned char *arr, int offset)
{
unsigned long int res;
memcpy(&res,&(arr[offset]),sizeof(unsigned long int));
return res;
}
/*
#if defined(_LINUX_)
#include "sys/sysinfo.h"
inline size_t freemem()
{
struct sysinfo si;
sysinfo(&si);
si.freeram+=si.bufferram;
return si.freeram>>20;
}
#else
inline size_t freemem()
{
return 0;
}
#endif
*/
myRNG::myRNG(int np, int nt, int rank, int &seed):nth(nt),npr(np),rk(rank) {
strm = new int*[nth];
if ( seed == 0 )
seed = make_sprng_seed();
for(int i=0; i<nth; i++)
strm[i] = init_sprng(nth*rk+i,npr*nth,seed,SPRNG_DEFAULT);
}
/*
int myRNG::Irand(int n) {
return isprng(strm[n]);
}
double myRNG::Drand(int n) {
return sprng(strm[n]);
}
*/
double myTimer::getTime() {
struct timeval tv;
gettimeofday(&tv, NULL);
return double(tv.tv_sec)+double(tv.tv_usec)/1000000.0;
}
myTimer::myTimer()
{
for(int i=0; i<100; i++) {
timeInt[i]=0.0;
counter[i]=0;
timeI[i]=0;
}
tag=0;
}
int myTimer::getTag() {
int n=tag;
tag++;
return n;
}
/*
void myTimer::start(int n) {
// assert(n<100)
timeI[n] = getTime();
}
void myTimer::stop(int n) {
// assert(n<100)
double tm=getTime();
timeInt[n]+=(tm-timeI[n]);
counter[n]++;
}
double myTimer::average(int n) {
return timeInt[n]/double(counter[n]);
}
*/
void myTimer::reset(int n) {
timeInt[n]=timeI[n]=0.0;
counter[n]=0;
}
int parseLine(std::ifstream &in, vector<std::string> &text2)
{
int len, num, pos1, pos2, pos3;
std::string text;
getline(in,text);
len = text.length();
pos1 = text.find_first_not_of (" \t\n=,");
if(pos1 == std::string::npos || text[pos1] == '#')
return 10;
text2.clear();
pos2 = 0;
while(pos1 != std::string::npos)
{
pos2 = text.find_first_of(" \t\n=,",pos1);
text2.push_back(text.substr(pos1, pos2-pos1));
pos1 = text.find_first_not_of(" \t\n=,",pos2);
}
return 0;
}
std::string itostr(int value, int base)
{
enum { kMaxDigits = 35 };
std::string buf;
buf.reserve( kMaxDigits );
if (base < 2 || base > 16) return buf;
int quotient = value;
do {
buf += "0123456789abcdef"[ std::abs( quotient % base ) ];
quotient /= base;
} while ( quotient );
if ( value < 0 && base == 10) buf += '-';
reverse( buf.begin(), buf.end() );
return buf;
}
std::string mystring_lower(std::string &str) {
std::string res=str;
std::transform(str.begin(),str.end(),res.begin(),::tolower);
return res;
}

View File

@ -0,0 +1,260 @@
#include<AFQMC/Utilities/Utils.h>
#include <numeric>
#include<iostream>
namespace std{
void swap(std::tuple<int &, int &, qmcplusplus::ValueType &> const& a, std::tuple<int &, int &, qmcplusplus::ValueType &> const& b) {
using std::swap;
swap(std::get<0>(a), std::get<0>(b));
swap(std::get<1>(a), std::get<1>(b));
swap(std::get<2>(a), std::get<2>(b));
}
}
namespace qmcplusplus {
// given a list of (N+1) integers, this routine attempts to find a partitioning of n continuous subsets
// such that the sum of elements in each set is approximately homogeneous
// In other words, the routine will minimize the variance of the difference between the sums in each set
// The number of elements in bucket i are given by indx[i+1]-indx[i]. In other words, tasks from indx[i] through indx[i+1]
// are assigned to bucket i. There are N buckets
void balance_partition_ordered_set(int N, int* indx, std::vector<int>& subsets)
{
int nsets = subsets.size()-1;
std::vector<int64_t> sums(nsets);
int64_t total,avg;
int64_t var,old_var;
auto get_var = [&] () {
int64_t var_=0;
for(std::vector<int64_t>::iterator it=sums.begin();
it<sums.end(); it++)
var_ += (*it-avg)*(*it-avg);
return var_;
};
// attempts to move subsets[i] to the right or the left if it reduces var
auto single_step_boundary = [&] (int i) {
int64_t dv1 = 0, dv2=0;
if( subsets[i]-1==subsets[i-1] ) {
// can't move left
if(subsets[i]+1==subsets[i+1]) return; // can't move right either
// try moving right
int64_t sm1_;
int64_t sm2_;
sm1_ = *(indx+subsets[i]+1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]+1);
dv1 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
while( dv1 <= 0 ) {
var += dv1;
subsets[i]++;
sums[i-1] = sm1_;
sums[i] = sm2_;
sm1_ = *(indx+subsets[i]+1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]+1);
dv1 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
}
} else {
if(subsets[i]+1==subsets[i+1]) {
// can only move left
int64_t sm1_;
int64_t sm2_;
sm1_ = *(indx+subsets[i]-1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]-1);
dv1 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
while( dv1 <= 0 ) {
var += dv1;
subsets[i]--;
sums[i-1] = sm1_;
sums[i] = sm2_;
sm1_ = *(indx+subsets[i]-1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]-1);
dv1 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
}
} else {
// can move either way
int64_t osm1 = sums[i-1], osm2 = sums[i], oset = subsets[i];
int64_t dvtot1 = 0, dvtot2 = 0;
// try moving left
int64_t sm1_;
int64_t sm2_;
sm1_ = *(indx+subsets[i]-1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]-1);
dv1 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
while( dv1 <= 0 ) {
dvtot1 += dv1;
subsets[i]--;
sums[i-1] = sm1_;
sums[i] = sm2_;
sm1_ = *(indx+subsets[i]-1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]-1);
dv1 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
}
//store
int64_t lsm1 = sums[i-1], lsm2 = sums[i], lset = subsets[i];
// restore
sums[i-1]=osm1;
sums[i]=osm2;
subsets[i]=oset;
// try moving right
sm1_ = *(indx+subsets[i]+1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]+1);
dv2 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
while( dv2 <= 0 ) {
dvtot2 += dv2;
subsets[i]++;
sums[i-1] = sm1_;
sums[i] = sm2_;
sm1_ = *(indx+subsets[i]+1) - *(indx+subsets[i-1]);
sm2_ = *(indx+subsets[i+1]) - *(indx+subsets[i]+1);
dv2 = (sm1_-avg)*(sm1_-avg) + (sm2_-avg)*(sm2_-avg)
- (sums[i]-avg)*(sums[i]-avg) - (sums[i-1]-avg)*(sums[i-1]-avg) ;
}
if(dvtot1 < dvtot2) {
sums[i-1]=lsm1;
sums[i]=lsm2;
subsets[i]=lset;
var += dvtot1;
} else
var += dvtot2;
return;
}
}
};
if(*(indx+N) == 0)
APP_ABORT("Error in PureSingleDeterminant::split_Ham_rows(): empty hamiltonian. \n");
// stupid algorithm right now
int i0=0;
int iN = N;
while( *(indx + i0) == *(indx + i0 + 1) ) i0++;
while( *(indx + iN - 1) == *(indx + iN) ) iN--;
int64_t avNpc = (iN-i0)/nsets;
int64_t extra = (iN-i0)%nsets;
for(int i=0; i<nsets; i++)
subsets[i]=( i<extra )?(i0+i*(avNpc+1)):(i0+i*avNpc+extra);
subsets[nsets]=iN;
for(int i=0; i<nsets; i++)
sums[i] = *(indx+subsets[i+1]) - *(indx+subsets[i]);
total = std::accumulate(sums.begin(),sums.end(),0);
avg = total/nsets;
//var=get_var();
do {
//old_var = var;
var=0;
for(int i=1; i<nsets; i++)
single_step_boundary(i);
} while( var < 0 );
//} while( std::abs(old_var-var) > 0 );
}
// careful
// FIX FIX FIX
// this routine returns interchanged (i,j)/(k,l), so it is wrong due to a std::complex conjugation for std::complex matrix elements
int cntExcitations(int NAEA, int NAEB, std::vector<IndexType>& DL, std::vector<IndexType>& DR, IndexType& n0, IndexType& n1, IndexType& n2, IndexType& n3, std::vector<IndexType>& occ, RealType& sg)
{
std::vector<IndexType>::iterator itR = DR.begin();
std::vector<IndexType>::iterator itL = DL.begin();
sg = 0.0;
int cnt=0,pos=0,ind[20],cnt2=0,nq=0,cnt3=0;
bool found;
int dummy = 1000000;
n0=n1=n2=n3=dummy;
for(int i=0; i<NAEA; i++) {
found=false;
for(int j=0; j<NAEA; j++)
if(*(itL+i) == *(itR+j)) {
found = true;
occ[cnt2++] = *(itL+i);
*(itL+i) = dummy;
*(itR+j) = dummy;
break;
}
if(!found) {
if(cnt<2) ind[cnt]=i;
cnt++;
if(cnt > 2) {
sg=0.0;
return 2*cnt;
}
}
}
for(int i=NAEA; i<NAEA+NAEB; i++) {
found=false;
for(int j=NAEA; j<NAEA+NAEB; j++)
if(*(itL+i) == *(itR+j)) {
found = true;
occ[cnt2++] = *(itL+i);
*(itL+i) = dummy;
*(itR+j) = dummy;
break;
}
if(!found) {
if(cnt<2) ind[cnt]=i;
cnt++;
if(cnt > 2) {
sg=0.0;
return 2*cnt;
}
}
}
if(cnt == 1) {
n1=static_cast<IndexType>(*(itL+ind[0]));
for(int i=0; i<NAEA+NAEB; i++)
if(*(itR+i) != dummy) { // there should be only one
nq = ind[0]-i;
n0=static_cast<IndexType>(*(itR+i));
break;
}
sg = nq%2==0?1.0:-1.0;
} else if(cnt == 2) {
int iq1=-1,iq2=-1;
n2=static_cast<IndexType>(*(itL+ind[0]));
n3=static_cast<IndexType>(*(itL+ind[1]));
for(int i=0; i<NAEA+NAEB; i++)
if(*(itR+i) != dummy) {
n0=static_cast<IndexType>(*(itR+i));
iq1=i;
break;
}
for(int i=iq1+1; i<NAEA+NAEB; i++)
if(*(itR+i) != dummy) { // there should be only one
n1=static_cast<IndexType>(*(itR+i));
iq2=i;
break;
}
if(iq1<0 || iq2<0)
APP_ABORT("Error in: cntExcitations.\n");
nq = ind[0]-iq1+ind[1]-iq2;
sg = nq%2==0?1.0:-1.0;
} else
sg=0.0;
return 2*cnt;
}
}

View File

@ -0,0 +1,33 @@
#ifndef QMCPLUSPLUS_AFQMC_UTILS_H
#define QMCPLUSPLUS_AFQMC_UTILS_H
#include<iostream>
#include<cstdlib>
#include<vector>
#include<string>
#include<AFQMC/config.h>
#include<AFQMC/config.0.h>
#include<Message/Communicate.h>
namespace qmcplusplus {
// given a list of N integers, this routine attempts to find a partitioning of n continuous subsets
// such that the sum of elements in each set is approximately homogeneous
// In other words, the routine will minimize the variance of the difference between the sums in each set
// The number of elements in bucket i are given by indx[i+1]-indx[i]. In other words, tasks from indx[i] through indx[i+1]
// are assigned to bucket i.
void balance_partition_ordered_set(int N, int* indx, std::vector<int>& subsets);
// careful
// FIX FIX FIX
// this routine returns interchanged (i,j)/(k,l), so it is wrong due to a std::complex conjugation for std::complex matrix elements
int cntExcitations(int NAEA, int NAEB, std::vector<IndexType>& DL, std::vector<IndexType>& DR, IndexType& n0, IndexType& n1, IndexType& n2, IndexType& n3, std::vector<IndexType>& occ, RealType& sg);
}
namespace std{
void swap(std::tuple<int &, int &, qmcplusplus::ValueType &> const& a, std::tuple<int &, int &, qmcplusplus::ValueType &> const& b);
}
#endif

153
src/AFQMC/Utilities/myTimer.h Executable file
View File

@ -0,0 +1,153 @@
#ifndef AFQMC_MYTIMER_H
#define AFQMC_MYTIMER_H
#include<tuple>
#include<vector>
#include<string>
#include<map>
#include<ctime>
#include <sys/time.h>
#include<cstdlib>
#include<ctype.h>
#include<algorithm>
#include<iostream>
#include<ostream>
class myTimer {
// TimeData<0>: number of intervals accumulated
// TimeData<1>: time of last call to start
// TimeData<2>: cumulative sum of intervals
typedef std::tuple<int,double,double> TimeData;
typedef std::tuple<int,double,double>* TimeDataPtr;
private:
std::vector<TimeData > timer;
std::map<std::string, int> id2pos;
double getTime() {
struct timeval tv;
gettimeofday(&tv, NULL);
return double(tv.tv_sec)+double(tv.tv_usec)/1000000.0;
}
// You can either register a timer (with call to add)
// or you can just call start or reset (if timer doesn't exist, it is created there).
// Calls to any other function with non-existent timers will do nothing
int getPos(const std::string& str) {
std::map<std::string, int>::iterator it = id2pos.find(str);
if(it!=id2pos.end())
return it->second;
else
return -1;
}
public:
myTimer()
{
timer.reserve(100);
}
// if a std::string is already associated with a timer, it does nothing
void add(const std::string& str) {
int n = getPos(str);
if(n < 0) {
timer.push_back(std::make_tuple(0,0.0,0.0));
id2pos[str] = timer.size()-1;
}
}
void start(const std::string& str) {
int n = getPos(str);
if(n < 0) {
timer.push_back(std::make_tuple(0,0.0,0.0));
id2pos[str] = timer.size()-1;
n = timer.size()-1;
}
std::get<1>(timer[n]) = getTime();
}
void stop(const std::string& str) {
double tm=getTime();
int n = getPos(str);
if(n >= 0) {
std::get<0>(timer[n])++;
std::get<2>(timer[n]) += (tm-std::get<1>(timer[n]));
}
}
double elapsed(const std::string& str) {
int n = getPos(str);
if(n >= 0)
return getTime()-std::get<1>(timer[n]);
return 0.0;
}
double average(const std::string& str) {
int n = getPos(str);
if(n >= 0)
return (std::get<0>(timer[n])==0)?(0.0):(std::get<2>(timer[n])/static_cast<double>(std::get<0>(timer[n])));
return 0.0;
}
double total(const std::string& str ) {
int n = getPos(str);
if(n >= 0)
return std::get<2>(timer[n]);
return 0.0;
}
void reset(const std::string& str) {
int n = getPos(str);
if(n < 0) {
timer.push_back(std::make_tuple(0,0.0,0.0));
id2pos[str] = timer.size()-1;
return;
}
std::get<0>(timer[n])=0;
std::get<1>(timer[n])=std::get<2>(timer[n])=0.0;
}
void print_elapsed(const std::string& str, std::ostream& out)
{
int n = getPos(str);
if(n >= 0)
out<<" Elapsed time in " <<str <<": " <<getTime()-std::get<1>(timer[n]) <<"\n";
else
out<<" Elapsed time in " <<str <<": Undefined Timer" <<"\n";
}
void print_average(const std::string& str, std::ostream& out)
{
int n = getPos(str);
if(n >= 0)
out<<" Average time in " <<str <<": " <<((std::get<0>(timer[n])==0)?(0.0):(std::get<2>(timer[n])/static_cast<double>(std::get<0>(timer[n])))) <<"\n";
else
out<<" Average time in " <<str <<": Undefined Timer" <<"\n";
}
void print_total(const std::string& str, std::ostream& out)
{
int n = getPos(str);
if(n >= 0)
out<<" Total time in " <<str <<": " <<std::get<2>(timer[n]) <<"\n";
else
out<<" Total time in " <<str <<": Undefined Timer" <<"\n";
}
void print_average_all(std::ostream& out)
{
for(std::map<std::string,int>::iterator it=id2pos.begin() ; it!=id2pos.end(); it++) {
int n = it->second;
out<<" Average time in " <<it->first <<": " <<((std::get<0>(timer[n])==0)?(0.0):(std::get<2>(timer[n])/static_cast<double>(std::get<0>(timer[n])))) <<"\n";
}
}
};
#endif // myTimer

258
src/AFQMC/Utilities/readHeader.h Executable file
View File

@ -0,0 +1,258 @@
#ifndef AFQMC_READHEADER_H
#define AFQMC_READHEADER_H
#include<cstdlib>
#include<iostream>
#include<fstream>
#include<vector>
#include<string>
#include<ctype.h>
#include "Utilities/SimpleParser.h"
#include "AFQMC/config.h"
namespace qmcplusplus
{
bool readHeader( std::ifstream& in,
int& NMAX, int& NMO, int& NETOT, int& NAEA, int& NAEB, int& NCA, int& NCB, int& MS2, bool& spinRestricted, int& ISYM, std::vector<IndexType>& occup_alpha, std::vector<IndexType>& occup_beta, std::vector<IndexType>& orbSymm, std::vector<IndexType>& occupPerSymm_alpha, std::vector<IndexType>& occupPerSymm_beta, bool& orderStates, bool factorizedHam)
{
factorizedHam=false;
// Read header, but do not overwrite variables that are >= 0 (initialized in xml input).
std::vector<std::string> words;
getwords(words,in);
do {
if(words.size() == 0) {
app_error()<<"Format error in ASCII integral file. End of file in header. \n";
return false;
}
for(std::vector<std::string>::iterator it=words.begin(); it!=words.end(); it++) {
if(*it == "&FCI") {
// do nothing
} else if(*it == "NORB" || *it == "NMO") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NORB \n";
return false;
}
if(NMO < 0) NMO = atoi((++it)->c_str());
else it++;
} else if(*it == "NMAX") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NMAX \n";
return false;
}
if(NMAX < 0) NMAX = atoi((++it)->c_str());
else it++;
} else if(*it == "NAEA") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NAEA \n";
return false;
}
if(NAEA < 0) NAEA = atoi((++it)->c_str());
else it++;
} else if(*it == "NAEB") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NAEB \n";
return false;
}
if(NAEB < 0) NAEB = atoi((++it)->c_str());
else it++;
} else if(*it == "NCB") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NAEB \n";
return false;
}
if(NCB <= 0) NCB = atoi((++it)->c_str());
else it++;
} else if(*it == "NCA") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NAEB \n";
return false;
}
if(NCA <= 0) NCA = atoi((++it)->c_str());
else it++;
} else if(*it == "NELEC") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. NETOT \n";
return false;
}
if(NETOT < 0) NETOT = atoi((++it)->c_str());
else it++;
} else if(*it == "MS2") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. MS2 \n";
return false;
}
if(MS2 < -50) MS2 = atoi((++it)->c_str());
else it++;
} else if(*it == "ORBSYM") {
if( NMO < 0 ) {
app_error()<<"NMO (NORB) must be defined before ORBSYM in ASCII integral file.\n";
return false;
}
orbSymm.clear();
orbSymm.reserve(2*NMO);
int n = NMO; //spinRestricted?NMO:2*NMO;
while(orbSymm.size() < n) {
it++;
if(it==words.end()) {
getwords(words,in);
if(words.size() == 0)
app_error()<<"Format error in ASCII integral file. End of file in header. \n";
it=words.begin();
}
bool isNumber = true;
for(std::string::const_iterator k = it->begin(); k != it->end(); ++k)
isNumber = (isNumber&&isdigit(*k));
if(isNumber) {
orbSymm.push_back( atoi(it->c_str()) );
} else {
app_error()<<" Format error in section ORBSYM" <<std::endl;
app_error()<<" Expecting an integer, found: " <<*it <<std::endl;
app_error()<<" Number of terms found so far: " <<orbSymm.size() <<std::endl;
return false;
}
};
} else if(*it == "ISYM") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. ISYM \n";
return false;
}
if(ISYM < 0) ISYM = atoi((++it)->c_str());
else it++;
} else if(*it == "UHF" || *it == "IUHF") {
if( it+1 == words.end() ) {
app_error()<<"Format error in ASCII integral file. UHF \n";
return false;
}
int uhf = atoi((++it)->c_str());
spinRestricted = (uhf==0);
} else if(*it == "OCCUP_ALPHA") {
if( NAEA < 0 ) {
app_error()<<"NCA and NAEA must be defined before OCCUP_ALPHA in ASCII integral file.\n";
return false;
}
if( it+(NAEA) == words.end() ) {
app_error()<<"Format error in ASCII integral file. OCCUP_ALPHA \n";
return false;
}
occup_alpha.resize(NAEA);
it++;
for(int i=0; i<NAEA; i++,it++) occup_alpha[i] = atoi(it->c_str())-1;
std::sort(occup_alpha.begin(),occup_alpha.end());
} else if(*it == "OCCUP_BETA") {
if( NAEB < 0 ) {
app_error()<<"NCB and NAEB must be defined before OCCUP_ALPHA in ASCII integral file.\n";
return false;
}
if( it+(NAEB) == words.end() ) {
app_error()<<"Format error in ASCII integral file. OCCUP_BETA \n";
return false;
}
occup_beta.resize(NAEB);
it++;
for(int i=0; i<NAEB; i++,it++) occup_beta[i] = atoi(it->c_str())-1+NMO-NCA;
std::sort(occup_beta.begin(),occup_beta.end());
} else if(*it == "OCCUP") {
if( NAEB < 0 || NAEA < 0 || NAEA != NAEB || NCA!=NCB ) {
app_error()<<"OCCUP std::string in ASCII integral file requires NCA=NCB,NAEA=NAEB,NAEA>0,NAEB>0. \n" << std::endl;
return false;
}
if( words.size() < NAEA+1 ) {
app_error()<<"Format error in ASCII integral file. OCCUP \n" <<std::endl;
return false;
}
occup_alpha.resize(NAEA);
occup_beta.resize(NAEB);
for(int i=0; i<NAEA; i++) occup_alpha[i] = atoi((++it)->c_str())-1;
for(int i=0; i<NAEB; i++) occup_beta[i] = occup_alpha[i]+NMO-NCA;
std::sort(occup_beta.begin(),occup_beta.end());
std::sort(occup_alpha.begin(),occup_alpha.end());
} else if(*it == "OCCUPSYMM_ALPHA") {
if( NAEA < 0 ) {
app_error()<<"NCA and NAEA must be defined before OCCUPSYMM_ALPHA in ASCII integral file.\n";
return false;
}
occupPerSymm_alpha.clear();
for(;(it+1)!=words.end();it++)
occupPerSymm_alpha.push_back(atoi((it+1)->c_str()));
int cnt=0;
for(int i=0; i<occupPerSymm_alpha.size(); i++)
cnt+=occupPerSymm_alpha[i];
if(cnt != NCA+NAEA) {
app_error()<<" Problems with OCCUPSYMM_ALPHA. Number of orbitals does not add to NCA+NAEA. \n";
return false;
}
if(!orderStates) orderStates=true;
} else if(*it == "OCCUPSYMM_BETA") {
if( NAEB < 0 ) {
app_error()<<"NCB and NAEB must be defined before OCCUPSYMM_BETA in ASCII integral file.\n";
return false;
}
occupPerSymm_beta.clear();
for(;(it+1)!=words.end();it++)
occupPerSymm_beta.push_back(atoi((it+1)->c_str()));
int cnt=0;
for(int i=0; i<occupPerSymm_beta.size(); i++)
cnt+=occupPerSymm_beta[i];
if(cnt != NCB+NAEB) {
app_error()<<" Problems with OCCUPSYMM_BETA. Number of orbitals does not add to NCB+NAEB. \n";
return false;
}
if(!orderStates) orderStates=true;
} else if(*it == "OCCUPSYMM") {
if( NAEB < 0 || NAEA < 0 || NAEA != NAEB || NCA!=NCB ) {
app_error()<<"OCCUPSYMM std::string in ASCII integral file requires NCA=NCB,NAEA=NAEB,NAEA>0,NAEB>0. \n" << std::endl;
return false;
}
occupPerSymm_alpha.clear();
occupPerSymm_beta.clear();
for(;(it+1)!=words.end();it++) {
occupPerSymm_alpha.push_back(atoi((it+1)->c_str()));
occupPerSymm_beta.push_back(atoi((it+1)->c_str()));
}
int cnt=0;
for(int i=0; i<occupPerSymm_alpha.size(); i++)
cnt+=occupPerSymm_alpha[i];
if(cnt != NCA+NAEA) {
app_error()<<" Problems with OCCUPSYMM_ALPHA. Number of orbitals does not add to NCA+NAEA. \n";
return false;
}
cnt=0;
for(int i=0; i<occupPerSymm_beta.size(); i++)
cnt+=occupPerSymm_beta[i];
if(cnt != NCB+NAEB) {
app_error()<<" Problems with OCCUPSYMM_BETA. Number of orbitals does not add to NCB+NAEB. \n";
return false;
}
if(!orderStates) orderStates=true;
} else if(*it == "NPROP" || *it == "PROPBITLEN") {
break; // ignore the rest of the line
} else {
app_log()<<"Ignoring unknown tag in ASCII integral file: " <<*it <<std::endl;
}
}
getwords(words,in);
if(in.eof() && words.size() == 0)
app_error()<<"Format error in ASCII integral file. End of file in header. \n";
while(!in.eof() && words.size() == 0) {
if(in.eof())
app_error()<<"Format error in ASCII integral file. End of file in header. \n";
getwords(words,in);
}
// if(words.size() == 0)
// app_error()<<"Format error in ASCII integral file. End of file in header. \n";
} while( (words[0].find(std::string("/"))==std::string::npos && words[0].find(std::string("&END"))==std::string::npos));
if(NMAX < 0) NMAX = NMO;
return true;
}
}
#endif

View File

@ -0,0 +1,438 @@
#ifndef AFQMC_TASK_GROUP_H
#define AFQMC_TASK_GROUP_H
#include<vector>
#include<string>
#include<map>
#include<ctime>
#include<sys/time.h>
#include<cstdlib>
#include<ctype.h>
#include<algorithm>
#include<iostream>
#include<ostream>
#include <mpi.h>
#include"AFQMC/config.h"
#include<Message/MPIObjectBase.h>
#include<Message/CommOperators.h>
namespace qmcplusplus
{
// sets up communicators and task groups
// Various divisions are setup:
// 1. head_of_nodes: used for all shared memory setups
// 2. breaks global comm into groups of ncores_per_TG x nnodes_per_TG
// and sets up appropriate communicators
// Right now does not allow communications outside the TG.
// This option must be enabled in order to implement algorithms that
// need to calculate properties that involve all walkers, e.g. pure estimators
class TaskGroup: public MPIObjectBase, public AFQMCInfo {
public:
TaskGroup(Communicate *c, std::string name):MPIObjectBase(c),tgname(name),initialized(false),
verbose(true)
{}
~TaskGroup() {};
void setBuffer(ComplexSMVector* buf) { commBuff = buf; }
// right now using std::vector and std::string to make the initial implementatino
// easier, but this is not efficient and can lead to memory fragmentation for large
// processor counts (e.g. > 10k)
bool setup(int ncore, int nnode, bool print=false) {
verbose = print;
ncores_per_TG = ncore;
nnodes_per_TG = nnode;
app_log()<<std::endl
<<"**************************************************************\n"
<<" Setting up Task Group: " <<name <<std::endl;
// do setup based on hostname and rank on myComm
std::vector<char> names;
if(myComm->rank()==0) names.resize(myComm->size()*HOST_NAME_MAX);
std::vector<char> myname(HOST_NAME_MAX);
// get hostname
gethostname(myname.data(),HOST_NAME_MAX);
myComm->gather(myname,names,0);
std::vector<int> data(4);
if(myComm->rank() == 0) {
// check for consistency of split
std::vector< hostinfo > node_map;
node_map.push_back( hostinfo(myname.data(),0) );
for(int i=1; i<myComm->size(); i++) {
int k = look_for_match(node_map,names.data()+i*HOST_NAME_MAX);
if( k >= 0 ) {
node_map[k].cnt++;
} else {
node_map.push_back( hostinfo( names.data()+i*HOST_NAME_MAX,0) );
}
}
if( node_map.size()%nnodes_per_TG != 0 ) {
std::cerr<<"Found " <<node_map.size() <<" nodes. " <<std::endl;
APP_ABORT(" Error in TaskGroup setup(): Number of nodes found is not divisible by requested number of nodes per Task Group. \n");
}
int cpn, ntg;
cpn = node_map[0].cnt;
for(int i=1; i<node_map.size(); i++) {
if(node_map[i].cnt != cpn) {
app_error()<<" Error: Inconsistent number of cores in node: " <<i <<std::endl;
app_error()<<" Expected: " <<cpn+1 <<" Found: " <<node_map[i].cnt+1 <<std::endl;
APP_ABORT(" Error in TaskGroup::setup(): Found inconsistent number of cores in nodes. All nodes must have the same number of cores. \n\n\n");
}
}
cpn++;
if( cpn%ncores_per_TG != 0 ) {
std::cerr<<"Found " <<cpn <<" cores per node. " <<std::endl;
APP_ABORT(" Error in TaskGroup setup(): Number of cores per node found is not divisible by requested number of cores in Task Group. \n");
}
ntg = (node_map.size()/nnodes_per_TG) * (cpn/ncores_per_TG);
app_log()<<" Found: " <<node_map.size() <<" nodes, each with: " <<cpn <<" cores. " <<std::endl;
app_log()<<" Task Group named: " <<tgname <<" will be split in " <<ntg <<" groups. \n"
<<" Each group contains " <<nnodes_per_TG <<" nodes * " <<ncores_per_TG <<" cores/node " <<std::endl;
// reset cnter
for(int i=0; i<node_map.size(); i++) node_map[i].cnt = 0;
node_map[0].cnt++;
// assign keys
data[0] = 0; // node number: number of the node the current task belongs to
data[1] = 0; // core rank
data[2] = node_map.size(); // number of nodes
data[3] = cpn; // number of cores per node
for(int i=1; i<myComm->size(); i++) {
int k = look_for_match(node_map,names.data()+i*HOST_NAME_MAX);
if( k >= 0 ) {
data[0] = k;
data[1] = node_map[k].cnt++;
myComm->send(data.data(),4,i,1010,myComm->getMPI());
} else {
APP_ABORT(" Error in TaskGroup::setup(): This should not happen. \n\n\n ");
}
}
data[0] = data[1] = 0;
} else {
// receive key
MPI_Status st;
myComm->recv(data.data(),4,0,1010,myComm->getMPI(),&st);
}
tot_nodes = data[2];
tot_cores = data[3];
node_number = data[0];
core_number = data[1];
// split communicator
nrows = tot_cores/ncores_per_TG;
ncols = tot_nodes/nnodes_per_TG;
mycol = node_number/nnodes_per_TG;
node_in_TG = node_number%nnodes_per_TG;
myrow = core_number/ncores_per_TG;
TG_number = mycol + ncols*myrow;
myComm->split_comm(TG_number,MPI_COMM_TG);
MPI_Comm_rank(MPI_COMM_TG,&TG_rank);
MPI_Comm_size(MPI_COMM_TG,&TG_nproc);
// assign a unique number to each local group
int TG_number_local;
// TG_number_local = TG_number*nnodes_per_TG + node_in_TG;
// myComm->split_comm(TG_number_local,MPI_COMM_TG_LOCAL);
TG_root = false;
if(TG_rank==0) TG_root = true;
core_rank = core_number%ncores_per_TG;
core_root = (core_rank==0);
// setup list of roots in a TG, which are the only ones who communicate
ranks_of_core_roots.reserve(nnodes_per_TG);
next_core_root = prev_core_root = -1;
std::vector<int> tmp(2);
position_in_ranks_of_core_roots=-1;
for(int i=0; i<TG_nproc; i++) {
if( TG_rank == i ) tmp[0] = core_rank;
myComm->bcast(tmp.data(),1,i,MPI_COMM_TG);
if(tmp[0]==0) ranks_of_core_roots.push_back(i);
if( core_root && TG_rank == i ) position_in_ranks_of_core_roots = ranks_of_core_roots.size()-1;
}
if(core_root) {
// std::cout<<myComm->rank() <<" " <<position_in_ranks_of_core_roots <<" " <<ranks_of_core_roots[position_in_ranks_of_core_roots] <<std::endl;
if( position_in_ranks_of_core_roots < 0 || position_in_ranks_of_core_roots >= nnodes_per_TG ) {
std::cerr<<" TaskGroup::setup() position_in_ranks_of_core_roots: " <<position_in_ranks_of_core_roots <<std::endl;
APP_ABORT(" Logic error in TaskGroup::setup(). \n\n\n ");
}
if( ranks_of_core_roots[position_in_ranks_of_core_roots] != TG_rank ) {
std::cerr<<" TaskGroup::setup() ranks_of_core_roots[position_in_ranks_of_core_roots]: " <<ranks_of_core_roots[position_in_ranks_of_core_roots] <<std::endl;
APP_ABORT(" Logic error in TaskGroup::setup(). \n\n\n ");
}
if( position_in_ranks_of_core_roots == 0 ) {
next_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots+1];
prev_core_root = ranks_of_core_roots[nnodes_per_TG-1];
} else if( position_in_ranks_of_core_roots == nnodes_per_TG-1 ) {
next_core_root = ranks_of_core_roots[0];
prev_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots-1];
} else {
next_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots+1];
prev_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots-1];
}
}
app_log()<<"**************************************************************\n";
initialized=true;
return true;
}
// sets up new TG with global information from previously defined TG
bool quick_setup(int ncore, int nnode, int node_number, int core_number, int tot_nodes, int tot_cores , bool print=true ) {
// if(!initialized) {
// app_error()<<" Error: Call to TaskGroup::quick_setup in uninitialized state. \n";
// return false;
// }
verbose = print;
ncores_per_TG = ncore;
nnodes_per_TG = nnode;
app_log()<<std::endl
<<"**************************************************************\n"
<<" Setting up Task Group: " <<name <<std::endl;
if( tot_nodes%nnodes_per_TG != 0 ) {
APP_ABORT(" Error in TaskGroup::quick_setup(): Number of nodes is not divisible by requested number of nodes per Task Group. \n");
return false;
}
if( tot_cores%ncores_per_TG != 0 ) {
APP_ABORT(" Error in TaskGroup::quick_setup(): Number of cores per node is not divisible by requested number of cores in Task Group. \n");
return false;
}
// split communicator
nrows = tot_cores/ncores_per_TG;
ncols = tot_nodes/nnodes_per_TG;
mycol = node_number/nnodes_per_TG;
node_in_TG = node_number%nnodes_per_TG;
myrow = core_number/ncores_per_TG;
TG_number = mycol + ncols*myrow;
myComm->split_comm(TG_number,MPI_COMM_TG);
MPI_Comm_rank(MPI_COMM_TG,&TG_rank);
MPI_Comm_size(MPI_COMM_TG,&TG_nproc);
// assign a unique number to each local group
int TG_number_local;
// TG_number_local = TG_number*nnodes_per_TG + node_in_TG;
// myComm->split_comm(TG_number_local,MPI_COMM_TG_LOCAL);
TG_root = false;
if(TG_rank==0) TG_root = true;
core_rank = core_number%ncores_per_TG;
core_root = (core_rank==0);
if(verbose) {
app_log()<<" System contains " <<tot_nodes <<" nodes, each with: " <<tot_cores <<" cores. " <<std::endl;
app_log()<<" Task Group named: " <<tgname <<" will be split in " <<nrows*ncols <<" groups. \n"
<<" Each group contains " <<nnodes_per_TG <<" nodes * " <<ncores_per_TG <<" cores/node " <<std::endl;
}
// setup list of roots in a TG, which are the only ones who communicate
ranks_of_core_roots.reserve(nnodes_per_TG);
next_core_root = prev_core_root = -1;
std::vector<int> tmp(2);
position_in_ranks_of_core_roots=-1;
for(int i=0; i<TG_nproc; i++) {
if( TG_rank == i ) tmp[0] = core_rank;
myComm->bcast(tmp.data(),1,i,MPI_COMM_TG);
if(tmp[0]==0) ranks_of_core_roots.push_back(i);
if( core_root && TG_rank == i ) position_in_ranks_of_core_roots = ranks_of_core_roots.size()-1;
}
if(core_root) {
// std::cout<<myComm->rank() <<" " <<position_in_ranks_of_core_roots <<" " <<ranks_of_core_roots[position_in_ranks_of_core_roots] <<std::endl;
if( position_in_ranks_of_core_roots < 0 || position_in_ranks_of_core_roots >= nnodes_per_TG ) {
std::cerr<<" TaskGroup::setup() position_in_ranks_of_core_roots: " <<position_in_ranks_of_core_roots <<std::endl;
APP_ABORT(" Logic error in TaskGroup::setup(). \n\n\n ");
}
if( ranks_of_core_roots[position_in_ranks_of_core_roots] != TG_rank ) {
std::cerr<<" TaskGroup::setup() ranks_of_core_roots[position_in_ranks_of_core_roots]: " <<ranks_of_core_roots[position_in_ranks_of_core_roots] <<std::endl;
APP_ABORT(" Logic error in TaskGroup::setup(). \n\n\n ");
}
if( position_in_ranks_of_core_roots == 0 ) {
next_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots+1];
prev_core_root = ranks_of_core_roots[nnodes_per_TG-1];
} else if( position_in_ranks_of_core_roots == nnodes_per_TG-1 ) {
next_core_root = ranks_of_core_roots[0];
prev_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots-1];
} else {
next_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots+1];
prev_core_root = ranks_of_core_roots[position_in_ranks_of_core_roots-1];
}
}
app_log()<<"**************************************************************" <<std::endl;
initialized=true;
return true;
}
void set_min_max(int min, int max) {
min_index=min;
max_index=max;
}
void get_min_max(int& min, int& max) {
min=min_index;
max=max_index;
}
// over full TG using mpi communicator
void barrier() {
MPI_Barrier(MPI_COMM_TG);
}
// over local node using boost sync
void local_barrier() {
//commBuff->barrier();
MPI_Barrier(MPI_COMM_TG_LOCAL);
}
MPI_Comm getTGCOMM() { return MPI_COMM_TG; }
MPI_Comm getTGCommLocal() { return MPI_COMM_TG_LOCAL; }
void setTGCommLocal(MPI_Comm cm) { MPI_COMM_TG_LOCAL = cm; }
MPI_Comm getNodeCommLocal() { return MPI_COMM_NODE_LOCAL; }
void setNodeCommLocal(MPI_Comm cm) { MPI_COMM_NODE_LOCAL = cm; }
void allgather_TG(std::vector<int>& l, std::vector<int>& g) {
myComm->allgather(l,g,l.size(),MPI_COMM_TG);
}
// size is in units of ComplexType and represents (walker_size)*(number_of_walkers)
void resize_buffer(int& size)
{
std::vector<int> sz(1);
sz[0]=size;
myComm->gmax(sz,MPI_COMM_TG);
size = sz[0];
// reset SM is necessary
commBuff->resize(size);
}
// on entry, nblock has the number of blocks that should be sent
// on return, nblock has the number of blocks received
void rotate_buffer(int& nblock, int block_size)
{
int n0 = nblock;
if(commBuff->size() < nblock*block_size) {
APP_ABORT(" Error in TaskGroup::rotate_buffer(). Buffer size is too small. \n\n\n ");
}
commBuff->barrier();
if(core_root) {
local_buffer.resize(commBuff->size()); // this guarantees that I'll be able to receive any message
if(nnodes_per_TG%2 != 0) {
APP_ABORT("Error: TaskGroup::rotate_buffer curently implemented for an even number on nodes per task group. Aborting!!! \n\n\n");
}
// simple algorithm for now, make this efficient later
// this can be made much faster and efficient
if(position_in_ranks_of_core_roots%2==0) {
MPI_Status status;
myComm->send(commBuff->values(),n0*block_size,next_core_root,1001,MPI_COMM_TG);
myComm->recv(local_buffer.data(),local_buffer.size(),prev_core_root,1002,MPI_COMM_TG,&status);
// assuming doubles for now, FIX FIX FIX
MPI_Get_count(&status,MPI_DOUBLE,&nblock);
nblock = nblock/2/block_size; // since I'm communicating std::complex
std::copy(local_buffer.begin(),local_buffer.begin()+nblock*block_size,commBuff->begin());
} else {
MPI_Status status;
myComm->recv(local_buffer.data(),local_buffer.size(),prev_core_root,1001,MPI_COMM_TG,&status);
// assuming doubles for now, FIX FIX FIX
MPI_Get_count(&status,MPI_DOUBLE,&nblock);
nblock = nblock/2/block_size; // since I'm communicating std::complex
myComm->send(commBuff->values(),n0*block_size,next_core_root,1002,MPI_COMM_TG);
std::copy(local_buffer.begin(),local_buffer.begin()+nblock*block_size,commBuff->begin());
}
}
commBuff->share(&nblock,1,core_root);
}
int getCoreRank() { return core_rank; }
int getLocalNodeNumber() { return node_in_TG; }
int getTGNumber() { return TG_number; }
int getTGRank() { return TG_rank; }
int getNCoresPerTG() { return ncores_per_TG; }
int getNNodesPerTG() { return nnodes_per_TG; }
void getRanksOfRoots(std::vector<int>& ranks, int& pos ) {
ranks=ranks_of_core_roots;
pos=position_in_ranks_of_core_roots;
}
void getSetupInfo(std::vector<int>& data)
{
data.resize(5);
data[0]=node_number;
data[1]=core_number;
data[2]=tot_nodes;
data[3]=tot_cores;
data[4]=ncores_per_TG;
}
// must be setup externally to be able to reuse between different TG
ComplexSMVector* commBuff;
std::string tgname;
bool verbose;
bool initialized;
int node_number, core_number, tot_nodes, tot_cores;
int TG_number;
// TGs are defined in a 2-D framwork. Rows correspond to different groups in a node
// Cols correspond to division of nodes into groups. Every MPI task belongs to a specific
// TG given by the myrow and mycol integer.
int myrow, mycol, nrows, ncols, node_in_TG;
bool TG_root; // over full TG
int TG_rank, TG_nproc; // over full TG, notice that nproc = ncores_per_TG * nnodes_per_TG
bool core_root; // over local node
int core_rank; // over local node
std::vector<int> ranks_of_core_roots;
int position_in_ranks_of_core_roots;
int next_core_root, prev_core_root; // only meaningful at core_root processes
MPI_Comm MPI_COMM_TG; // Communicator over all cores in a given TG
MPI_Comm MPI_COMM_TG_LOCAL; // Communicator over all cores in a given TG that reside in the given node
MPI_Comm MPI_COMM_NODE_LOCAL; // Communicator over all cores of a node. Must be created externally. Same above
std::vector<ComplexType> local_buffer;
int ncores_per_TG; // total number of cores in all nodes must be a multiple
int nnodes_per_TG; // total number of nodes in communicator must be a multiple
int min_index, max_index;
struct hostinfo {
char id[HOST_NAME_MAX];
int cnt;
hostinfo( char* name, int n) {
std::memcpy(id,name,HOST_NAME_MAX*sizeof(char));
cnt=n;
}
};
int look_for_match(std::vector<hostinfo>& v, char* n) {
for(int i=0; i<v.size(); i++) {
if( std::strcmp(v[i].id,n)==0 )
return i;
}
return -1;
}
};
}
#endif

View File

@ -0,0 +1,140 @@
#ifndef AFQMC_TUPLE_ITERATORS
#define AFQMC_TUPLE_ITERATORS
#include<tuple>
#include<cstdlib>
#include<functional>
#include <algorithm>
#include<boost/iterator/iterator_facade.hpp>
#include<boost/iterator/iterator_traits.hpp>
//#include"AFQMC/Utilities/Utils.h"
namespace qmcplusplus{
template<
class FirstIter, class SecondIter,
typename R = std::pair<typename std::iterator_traits<FirstIter>::value_type&, typename std::iterator_traits<SecondIter>::value_type&>,
typename D = typename std::iterator_traits<FirstIter>::difference_type
>
class paired_iterator :
public boost::iterator_facade<
paired_iterator<FirstIter, SecondIter>,
std::pair<
typename std::iterator_traits<FirstIter>::value_type,
typename std::iterator_traits<SecondIter>::value_type
>,
std::random_access_iterator_tag,
R,
D
>
{
public:
FirstIter first;
SecondIter second;
paired_iterator(FirstIter first_, SecondIter second_) : first(first_), second(second_){}
using difference_type = D;
using reference = R;
private:
friend class boost::iterator_core_access;
void increment(){++first; ++second;}
void decrement(){--first; --second;}
bool equal(paired_iterator const& other) const{
return first == other.first and second == other.second;
}
reference dereference() const{return {*first, *second};}
void advance(difference_type n){first += n; second += n;}
difference_type distance_to(paired_iterator other) const{
return other.first - first;
}
};
template<
class FirstIter, class SecondIter, class ThirdIter,
typename R = std::tuple<typename std::iterator_traits<FirstIter>::value_type&,
typename std::iterator_traits<SecondIter>::value_type&,
typename std::iterator_traits<ThirdIter>::value_type&>,
typename D = typename std::iterator_traits<FirstIter>::difference_type
>
class tuple_iterator :
public boost::iterator_facade<
tuple_iterator<FirstIter, SecondIter, ThirdIter>,
std::tuple<
typename std::iterator_traits<FirstIter>::value_type,
typename std::iterator_traits<SecondIter>::value_type,
typename std::iterator_traits<ThirdIter>::value_type
>,
std::random_access_iterator_tag,
R,
D
>
{
public:
FirstIter first;
SecondIter second;
ThirdIter third;
tuple_iterator(FirstIter first_, SecondIter second_, ThirdIter third_) :
first(first_), second(second_),third(third_){}
using difference_type = D;
using reference = R;
private:
friend class boost::iterator_core_access;
void increment(){++first; ++second; ++third;}
void decrement(){--first; --second; --third;}
// is this correct for what I want
bool equal(tuple_iterator const& other) const{
return first == other.first and second == other.second and third == other.third;
}
reference dereference() const
{return std::make_tuple(std::ref(*first), std::ref(*second), std::ref(*third));}
void advance(difference_type n){first += n; second += n; third += n;}
difference_type distance_to(tuple_iterator other) const{
return other.first - first;
}
};
template <class FirstIter, class SecondIter>
paired_iterator<FirstIter, SecondIter>
make_paired_iterator(FirstIter ci, SecondIter vi) {
return {ci, vi};
};
template <class FirstIter, class SecondIter, class ThirdIter>
tuple_iterator<FirstIter, SecondIter, ThirdIter>
make_tuple_iterator(FirstIter ci, SecondIter vi, ThirdIter ti){
return {ci, vi, ti};
};
}
namespace std{
template<class It1, class It2>
void iter_swap(qmcplusplus::paired_iterator<It1, It2> const& a, qmcplusplus::paired_iterator<It1, It2> const& b){
using std::swap;
swap(std::get<0>(*a), std::get<0>(*b));
swap(std::get<1>(*a), std::get<1>(*b));
}
template<class It1, class It2, class It3>
void iter_swap(qmcplusplus::tuple_iterator<It1, It2, It3> const& a, qmcplusplus::tuple_iterator<It1, It2, It3> const& b){
using std::swap;
swap(std::get<0>(*a), std::get<0>(*b));
swap(std::get<1>(*a), std::get<1>(*b));
swap(std::get<2>(*a), std::get<2>(*b));
}
}
#endif

View File

@ -0,0 +1,704 @@
#include<cassert>
#include<random>
#include<cstdlib>
#if defined(HAVE_MPI)
#include<mpi.h>
#endif
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "Message/CommOperators.h"
#include"AFQMC/Walkers/SlaterDetWalker.h"
#include"AFQMC/Walkers/DistWalkerHandler.h"
namespace qmcplusplus
{
bool DistWalkerHandler::restartFromXML()
{
return true;
}
bool DistWalkerHandler::dumpToXML()
{
return true;
}
// not thinking about efficiency since this is only used once during initialization
bool DistWalkerHandler::restartFromHDF5(int n, hdf_archive& read, const std::string& tag, bool set_to_target)
{
/*
// return initWalkers(n);
int nproc = myComm->size();
Walker dummy;
dummy.initWalker(HFMat,true);
int sz = dummy.sizeForDump();
std::vector<int> from(nproc);
int cnt,nWtot = 0;
std::vector<char> buffer;
std::vector<char> bufferall;
std::vector<int> counts(nproc), displ(nproc);
if(myComm->rank()==0) {
std::vector<int> Idata(2);
std::string path = "/Walkers/DistWalkerHandler_SlaterDetWalker";
if(tag != std::string("")) path += std::string("/")+tag;
if(!read.is_group( path )) {
app_error()<<" ERROR: H5Group could not find /Walkers/DistWalkerHandler_SlaterDetWalker/{tag} group in file. No restart data for walkers. \n";
return false;
}
if(!read.push("Walkers")) return false;
if(!read.push("DistWalkerHandler_SlaterDetWalker")) return false;
if(tag != std::string("")) if(!read.push(tag)) return false;
if(!read.read(Idata,"DistWalkerHandler_dims")) return false;
nWtot=Idata[0];
app_log()<<"Found " <<nWtot <<" walkers on restart file." <<std::endl;
if(Idata[1] != sz) {
app_error()<<" ERROR: Size of walker is not consistent in hdf5 file. \n";
app_error()<<" sz_sim, sz_file: " <<sz <<" " <<Idata[1] <<std::endl;
return false;
}
bufferall.resize(nWtot*sz);
if(!read.read(bufferall,"DistWalkerHandler_walkers")) return false;
if(tag != std::string("")) read.pop();
read.pop();
read.pop();
int nWperProc = nWtot/nproc;
int nWextra = nWtot%nproc;
if( set_to_target && nWperProc >= n ) {
nWperProc = n;
nWextra = 0;
}
for(int i=0; i<nWextra; i++) from[i] = nWperProc+1;
for(int i=nWextra; i<nproc; i++) from[i] = nWperProc;
myComm->bcast(from);
int nW = from[myComm->rank()];
walkers.clear();
if(set_to_target) walkers.resize(n);
else walkers.resize(nW);
cnt=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.begin()+nW; it++) {
it->initWalker(HFMat);
it->restartFromChar( bufferall.data()+cnt );
cnt+=sz;
}
cnt=0;
for(WalkerIterator it=walkers.begin()+nW; it!=walkers.end(); it++) {
it->initWalker(HFMat);
*it = walkers[cnt];
cnt++;
if(cnt == nW) cnt=0;
}
displ[0]=0;
displ[1]=0;
counts[0]=0;
for(int i=1; i<nproc-1; i++) {
counts[i] = from[i]*sz;
displ[i+1] = displ[i]+counts[i];
}
counts[nproc-1] = from[nproc-1]*sz;
myComm->scatterv( bufferall, buffer, counts, displ, 0);
} else {
myComm->bcast(from);
int nW = from[myComm->rank()];
buffer.resize(nW*sz);
myComm->scatterv( bufferall, buffer, counts, displ, 0);
walkers.clear();
if(set_to_target) walkers.resize(n);
else walkers.resize(nW);
cnt=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.begin()+nW; it++) {
it->initWalker(HFMat);
it->restartFromChar( buffer.data()+cnt );
cnt+=sz;
}
cnt=0;
for(WalkerIterator it=walkers.begin()+nW; it!=walkers.end(); it++) {
it->initWalker(HFMat);
*it = walkers[cnt];
cnt++;
if(cnt == nW) cnt=0;
}
}
*/
return true;
}
bool DistWalkerHandler::dumpToHDF5(hdf_archive& dump, const std::string& tag)
{
/*
// check that restart data doesnot exist
std::string path = "/Walkers/DistWalkerHandler_SlaterDetWalker";
if(tag != std::string("")) path += std::string("/")+tag;
if(dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Walkers/DistWalkerHandler_SlaterDetWalker/{tag} already exists in restart file. This is a bug and should not happen. Contact a developer.\n";
return false;
}
// doing one big gatherV, not sure if this is the best way right now
int nW = numWalkers();
int sz = walkers[0].sizeForDump();
std::vector<int> to(1,nW);
std::vector<int> from(myComm->size());
int nWtot = 0;
myComm->allgather(to,from,1);
for(int i=0; i<myComm->size(); i++) nWtot += from[i];
std::vector<char> buffer(nW*sz);
std::vector<char> bufferall;
if(myComm->rank() == 0)
bufferall.resize(nWtot*sz);
int cnt=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); it++)
if(it->alive) {
it->dumpToChar( buffer.data()+cnt );
cnt+=sz;
}
std::vector<int> displ(myComm->size());
for(int i=0; i<myComm->size(); i++) from[i]*=sz;
cnt=0;
for(int i=0; i<myComm->size(); i++) {
displ[i] = cnt;
cnt+=from[i];
}
myComm->gatherv(buffer,bufferall,from,displ,0);
if(myComm->rank()==0) {
// now write to HDF5 file
std::vector<int> Idata(2);
Idata[0]=nWtot;
Idata[1]=sz;
dump.push("Walkers");
dump.push("DistWalkerHandler_SlaterDetWalker");
if(tag != std::string("")) dump.push(tag);
dump.write(Idata,"DistWalkerHandler_dims");
dump.write(bufferall,"DistWalkerHandler_walkers");
if(tag != std::string("")) dump.pop();
dump.pop();
dump.pop();
dump.flush();
}
*/
return true;
}
bool DistWalkerHandler::parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
walkerType = "collinear";
load_balance_alg = "all";
ParameterSet m_param;
m_param.add(reset_weight,"reset_weight","double");
m_param.add(max_weight,"max_weight","double");
m_param.add(min_weight,"min_weight","double");
m_param.add(extra_empty_spaces,"extra_spaces","int");
m_param.add(walkerType,"walker_type","std::string");
m_param.add(load_balance_alg,"algo","std::string");
m_param.put(cur);
if(load_balance_alg == "all")
app_log()<<" Using all-to-all (gather/scatter) load balancing algorithm. " <<std::endl;
else if(load_balance_alg == "iter" || load_balance_alg == "iterative" || load_balance_alg == "seq") {
load_balance_alg = "iter";
app_log()<<" Using iterative (1-to-1 communication based) load balancing algorithm. " <<std::endl;
} else {
std::cerr<<" Error: Unknown load balancing algorithm: " <<load_balance_alg <<std::endl;
return false;
}
std::transform(walkerType.begin(),walkerType.end(),walkerType.begin(),(int (*)(int)) tolower);
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="something") {
}
cur = cur->next;
}
return true;
}
void DistWalkerHandler::setHF(const ComplexMatrix& HF)
{
assert( HF.rows() == HFMat.rows() && HF.cols() == HFMat.cols() );
HFMat = HF;
}
bool DistWalkerHandler::setup(int cr, int nc, int tgn, MPI_Comm heads_comm, MPI_Comm tg_comm, MPI_Comm node_comm, myTimer* timer)
{
LocalTimer=timer;
core_rank=cr;
ncores_per_TG=nc;
head = core_rank==0;
ncol=NAEA;
nrow=NMO;
if(walkerType == "closed_shell") {
type = 1;
} else if(walkerType == "collinear") {
type = 2;
nrow*=2;
} else if(walkerType == "non-collinear") {
type = 4;
nrow*=2;
ncol*=2;
} else {
app_error()<<" Error: Incorrect walker type on DistWalkerHandler::setup \n";
return false;
}
// Stored data [all assumed std::complex numbers]:
// - INFO: 1 (e.g. alive, init, etc)
// - SlaterMatrix: NCOL*NROW
// type = 1 for closed_shell
// type = 2 for non-closed shell collinear
// type = 4 for non-collinear
// - weight: 1
// - eloc: 2
// - eloc_old: 2
// - overlap_alpha: 2
// - overlap_beta: 2
// - old_overlap_alpha: 2
// - old_overlap_beta: 2
// Total: 14+NROW*NCOL
int cnt=0;
data_displ[0] = cnt; cnt++;
data_displ[1] = cnt; cnt+=nrow*ncol;
data_displ[2] = cnt; cnt+=1; // weight
data_displ[3] = cnt; cnt+=2; // eloc
data_displ[4] = cnt; cnt+=2; // eloc_old
data_displ[5] = cnt; cnt+=2; // overlap_alpha
data_displ[6] = cnt; cnt+=2; // overlap_beta
data_displ[7] = cnt; cnt+=2; // old_overlap_alpha
data_displ[8] = cnt; cnt+=2; // old_overlap_beta
walker_size = cnt;
walker_memory_usage = walker_size*sizeof(ComplexType);
walkers.setup(head,std::string("DistWalkerHandler_")+std::to_string(tgn),tg_comm);
tot_num_walkers=maximum_num_walkers=0;
MPI_COMM_TG_LOCAL = tg_comm;
MPI_COMM_TG_LOCAL_HEADS = heads_comm;
#if defined(HAVE_MPI)
MPI_Comm_size(MPI_COMM_TG_LOCAL_HEADS,&nproc_heads);
MPI_Comm_rank(MPI_COMM_TG_LOCAL_HEADS,&rank_heads);
#else
nproc_heads=rank_heads=0;
#endif
empty_spots.reserve(1000);
outgoing.reserve(nproc_heads);
incoming.reserve(nproc_heads);
counts.resize(nproc_heads);
displ.resize(nproc_heads);
nwalk_counts_old.resize(nproc_heads);
nwalk_counts_new.resize(nproc_heads);
// default setup
HFMat.resize(nrow,ncol);
for(int i=0; i<nrow; i++)
for(int j=0; j<ncol; j++)
HFMat(i,j) = ComplexType(0.0,0.0);
for(int j=0; j<ncol; j++)
HFMat(j,j) = ComplexType(1.0,0.0);
if(type == 2)
for(int j=0; j<NAEB; j++)
HFMat(j+NMO,j) = ComplexType(1.0,0.0);
// setup buffers for dump and communication
min_weight = std::max(std::abs(min_weight),1e-2);
return true;
}
bool DistWalkerHandler::clean()
{
walkers.clear();
}
// called at the beginning of each executable section
void DistWalkerHandler::resetNumberWalkers(int n, bool a, ComplexMatrix* S)
{
if(head) {
ComplexMatrix* S0 = S;
if(S0==NULL) S0 = &HFMat;
// assuming everything is alive
int ns = size();
if(n+extra_empty_spaces > ns) {
std::vector<ComplexType> tmp(tot_num_walkers*walker_size); // store old walker info
int cnt=0;
for(int i=0; i<tot_num_walkers; i++)
if(walkers[walker_size*i+data_displ[INFO]].real() > 0)
std::copy(walkers.begin()+walker_size*i,walkers.begin()+walker_size*(i+1),tmp.begin()+(cnt++)*walker_size);
walkers.resize((n+extra_empty_spaces)*walker_size);
tot_num_walkers = std::min(n,cnt);
std::copy(tmp.begin(),tmp.begin()+walker_size*tot_num_walkers,walkers.begin());
for(int i=tot_num_walkers; i<n+extra_empty_spaces; i++)
walkers[walker_size*i+data_displ[INFO]] = ComplexType(-1.0);
}
// now I have enough space, add or remove walkers if needed
if(tot_num_walkers < n) {
// adding walkers
int cnt=tot_num_walkers;
for(int i=0; i<size(); i++) {
if(walkers[walker_size*i+data_displ[INFO]].real() < 0) {
walkers[walker_size*i+data_displ[INFO]] = ComplexType(1.0);
std::copy(S0->begin(),S0->end(),walkers.begin()+walker_size*i+data_displ[SM]);
*(walkers.begin()+walker_size*i+data_displ[WEIGHT]) = ComplexType(1.0);
*(walkers.begin()+walker_size*i+data_displ[ELOC]) = ComplexType(0);
*(walkers.begin()+walker_size*i+data_displ[OVLP_A]) = ComplexType(0);
*(walkers.begin()+walker_size*i+data_displ[OVLP_B]) = ComplexType(0);
cnt++;
}
if(cnt == n) break;
}
if(cnt != n) APP_ABORT("Error: Problems in DistWalkerHandler::resetNumberWalkers-add. \n");
} else if(tot_num_walkers > n) {
// removing walkers
int cnt=tot_num_walkers;
for(int i=size()-1; i>=0; i--) {
if(walkers[walker_size*i+data_displ[INFO]].real() > 0) {
walkers[walker_size*i+data_displ[INFO]] = ComplexType(-1.0);
cnt--;
}
if(cnt == n) break;
}
if(cnt != n) APP_ABORT("Error: Problems in DistWalkerHandler::resetNumberWalkers-remove. \n");
}
} else {
int ns = size();
if(n+extra_empty_spaces > ns)
walkers.resize((n+extra_empty_spaces)*walker_size);
}
walkers.barrier();
maximum_num_walkers = n+extra_empty_spaces;
tot_num_walkers = n;
}
// load balancing algorithm
void DistWalkerHandler::loadBalance()
{
#if defined(HAVE_MPI)
nwalk_min = nwalk_max= tot_num_walkers;
if(nproc_heads==1) return;
int nW = numWalkers();
int sz = size(), nw_new=0;
MPI_Request request;
LocalTimer->start("WalkerHandler::loadBalance::setup");
// determine new number of walkers
if(head) {
for(int i=0; i<nproc_heads; i++)
nwalk_counts_old[i] = nwalk_counts_new[i] = 0;
nwalk_counts_new[0] = nW;
// in case iallgather is not available
//myComm->allgather(nwalk_counts_new,nwalk_counts_old,1,MPI_COMM_TG_LOCAL_HEADS);
#if MPI_VERSION >= 3
#define HAVE_MPI_IALLGATHER
#endif
#ifdef HAVE_MPI_IALLGATHER
MPI_Iallgather(nwalk_counts_new.data(), 1, MPI_INT, nwalk_counts_old.data(), 1, MPI_INT, MPI_COMM_TG_LOCAL_HEADS,&request);
// push empty spots to the end of the list
push_walkers_to_front();
// wait for mpi_iallgather
MPI_Wait(&request, MPI_STATUS_IGNORE);
#else
MPI_Allgather(nwalk_counts_new.data(), 1, MPI_INT, nwalk_counts_old.data(), 1, MPI_INT, MPI_COMM_TG_LOCAL_HEADS);
push_walkers_to_front();
#endif
nwalk_global=0;
for(int i=0; i<nproc_heads; i++)
nwalk_global+=nwalk_counts_old[i];
for(int i=0; i<nproc_heads; i++)
nwalk_counts_new[i] = nwalk_global/nproc_heads + ((i<nwalk_global%nproc_heads)?(1):(0));
auto min_max = std::minmax_element(nwalk_counts_old.begin(),nwalk_counts_old.end());
nwalk_min = *min_max.first;
nwalk_max = *min_max.second;
nw_new = nwalk_counts_new[rank_heads];
//if(rank()==0)
// for(int i=0; i<nproc_heads; i++)
// app_log()<<i <<" " <<nwalk_counts_old[i] <<" " <<nwalk_min <<" " <<nwalk_max <<" " <<nw_new <<std::endl;
//app_log()<<" Before: " <<std::endl;
//cout<<rank() <<" " <<nW <<" " <<nw_new <<" " <<nwalk_global <<std::endl;
//MPI_Barrier(MPI_COMM_TG_LOCAL_HEADS);
}
LocalTimer->stop("WalkerHandler::loadBalance::setup");
LocalTimer->start("WalkerHandler::loadBalance::resize");
// resize arrays if necessary. This requires all cores in a TG
walkers.share(&nw_new,1,head);
if(nw_new > sz) walkers.resize(walker_size*(nw_new+extra_empty_spaces));
LocalTimer->stop("WalkerHandler::loadBalance::resize");
LocalTimer->start("WalkerHandler::loadBalance::exchange");
if(load_balance_alg == "all") {
if(head) {
int ncomm = nW - nwalk_counts_new[rank_heads];
int pos = 0, cnt=0;
char *buffer;
if(ncomm > 0)
buffer = reinterpret_cast<char*>( &( *(walkers.begin() + walker_size*nwalk_counts_new[rank_heads]) ) );
else
buffer = reinterpret_cast<char*>( &( *(walkers.begin() + walker_size*nW) ) );
if(rank_heads==0) {
// setup gatherv call
displ[0]=0;
for(int i=0; i<nproc_heads-1; i++)
if(nwalk_counts_old[i] > nwalk_counts_new[i]) {
counts[i] = (nwalk_counts_old[i]-nwalk_counts_new[i])*walker_size;
displ[i+1] = displ[i]+counts[i];
} else {
counts[i]=0;
displ[i+1] = displ[i];
}
if(nwalk_counts_old[nproc_heads-1] > nwalk_counts_new[nproc_heads-1])
counts[nproc_heads-1] = (nwalk_counts_old[nproc_heads-1]-nwalk_counts_new[nproc_heads-1])*walker_size;
else
counts[nproc_heads-1]=0;
bufferall.clear();
bufferall.resize(displ[nproc_heads-1]+counts[nproc_heads-1]);
int nn = walker_size*((ncomm>0)?ncomm:0);
myComm->gatherv( buffer, bufferall.data(), nn , counts, displ, 0, MPI_COMM_TG_LOCAL_HEADS);
// setup scatterv call
displ[0]=0;
for(int i=0; i<nproc_heads-1; i++)
if(nwalk_counts_old[i] < nwalk_counts_new[i]) {
counts[i] = (nwalk_counts_new[i]-nwalk_counts_old[i])*walker_size;
displ[i+1] = displ[i]+counts[i];
} else {
counts[i]=0;
displ[i+1] = displ[i];
}
if(nwalk_counts_old[nproc_heads-1] < nwalk_counts_new[nproc_heads-1])
counts[nproc_heads-1] = (nwalk_counts_new[nproc_heads-1]-nwalk_counts_old[nproc_heads-1])*walker_size;
else
counts[nproc_heads-1]=0;
nn = walker_size*((ncomm<0)?(std::abs(ncomm)):0);
myComm->scatterv( bufferall.data(), buffer, nn, counts, displ, 0, MPI_COMM_TG_LOCAL_HEADS);
if(ncomm > 0) {
register ComplexType zero = ComplexType(-1.0,0.0);
for(ComplexSMVector::iterator it=walkers.begin()+nwalk_counts_new[rank_heads]*walker_size; it<walkers.end(); it+=walker_size)
*(it+data_displ[INFO]) = zero;
}
} else {
int nn = walker_size*((ncomm>0)?ncomm:0);
myComm->gatherv( buffer, bufferall.data(), nn, counts, displ, 0, MPI_COMM_TG_LOCAL_HEADS);
nn = walker_size*((ncomm<0)?(std::abs(ncomm)):0);
myComm->scatterv( bufferall.data(), buffer, nn, counts, displ, 0, MPI_COMM_TG_LOCAL_HEADS);
if(ncomm > 0) {
register ComplexType zero = ComplexType(-1.0,0.0);
for(ComplexSMVector::iterator it=walkers.begin()+nwalk_counts_new[rank_heads]*walker_size; it<walkers.end(); it+=walker_size)
*(it+data_displ[INFO]) = zero;
}
}
} // head
} else if(load_balance_alg == "iter") {
// 1. (only heads) exchange data directly into SM without need for temporary
// - Implement both 1. stupid gather/scatter algorithm
// 2. clever algorithm based on 1-1 communications
}
LocalTimer->stop("WalkerHandler::loadBalance::exchange");
walkers.barrier();
tot_num_walkers=0;
for(ComplexSMVector::iterator it=walkers.begin()+data_displ[INFO]; it<walkers.end(); it+=walker_size)
if(it->real() > 0) tot_num_walkers++;
//if(head) {
//app_log()<<" After: " <<std::endl;
//cout<<rank() <<" " <<tot_num_walkers <<std::endl;
//MPI_Barrier(MPI_COMM_TG_LOCAL_HEADS);
//}
#endif
}
// moves all walkers to the front and empty spots to the end
void DistWalkerHandler::push_walkers_to_front()
{
if(!head) return;
ComplexSMVector::iterator empty = walkers.begin(), wlk = walkers.end()-walker_size;
// 0. while wlk != empty
// 1. find next walker from the end
// 2. find next empty from front
// 3. swap
while( wlk < empty ) {
// 1. find next walker
while( (wlk+data_displ[INFO])->real() < 0 && wlk > empty )
wlk -= walker_size;
if(wlk <= empty)
return;
// 2. find next walker
while( (empty+data_displ[INFO])->real() > 0 && empty < wlk )
empty += walker_size;
if(wlk <= empty)
return;
// 3. swap
std::copy(wlk,wlk+walker_size,empty);
*(wlk+data_displ[INFO]) = ComplexType(-1.0,0.0);
}
}
// population control algorithm
void DistWalkerHandler::popControl()
{
ComplexType minus = ComplexType(-1.0,0.0);
// do I need to sync here?
walkers.barrier();
int buff_reset = 0;
if( head ) {
int max=size(), nw=0, cnt=0;
empty_spots.clear();
for(ComplexSMVector::iterator it=walkers.begin(); it<walkers.end(); it+=walker_size, cnt++) {
if( (it+data_displ[INFO])->real() < 0 || std::abs(*(it+data_displ[WEIGHT])) <= 1e-6) {
// walker is not alive
empty_spots.push_back(cnt);
*(it+data_displ[INFO]) = minus;
} else {
ComplexType w0 = *(it+data_displ[WEIGHT]);
if( std::abs(w0) < std::abs(min_weight))
if( (int)(distribution(generator) + std::abs(w0)/reset_weight) == 0 ) {
*(it+data_displ[INFO]) = minus;
empty_spots.push_back(cnt);
} else {
*(it+data_displ[WEIGHT]) = reset_weight;
}
}
}
// number of walkers after killing small weights
nw = max - empty_spots.size();
// tentative algorithm to avoid multiple memory reallocations
// 1. go through the list and decide how many copies of a walker are needed. Use upper bound
// 2. Expand memory allocation if necessary
// 3. Make copies of walkers
int num_new=0;
for(ComplexSMVector::iterator it=walkers.begin(); it<walkers.end(); it+=walker_size)
if( (it+data_displ[INFO])->real() > 0 && std::abs(*(it+data_displ[WEIGHT])) > std::abs(max_weight))
num_new += std::floor( std::abs( *(it+data_displ[WEIGHT]) ) / std::abs(reset_weight) ); // since I only need n-1 copies
if( num_new > empty_spots.size() ) {
buff_reset = 1; // reset SHM pointers when done
int ntot = nw + num_new + extra_empty_spaces;
std::vector<ComplexType> tmp(nw*walker_size); // store old walker info
cnt=0;
for(ComplexSMVector::iterator it=walkers.begin(); it<walkers.end(); it+=walker_size)
if( (it+data_displ[INFO])->real() > 0 )
std::copy( it, it+walker_size, tmp.begin()+(cnt++)*walker_size);
int nn = ntot*walker_size;
walkers.share(&nn,1,head);
walkers.resize(ntot*walker_size,false);
std::copy(tmp.begin(),tmp.begin()+walker_size*nw,walkers.begin());
for(ComplexSMVector::iterator it=walkers.begin()+nw*walker_size; it<walkers.end(); it+=walker_size)
*(it+data_displ[INFO]) = minus;
empty_spots.clear();
int newsz = size();
empty_spots.reserve( newsz-nw );
for(int i=nw; i<newsz; i++) empty_spots.push_back(i);
} else {
int nn=0;
walkers.share<int>(&nn,1,head);
}
// insert new walkers in empty spots
cnt=0;
for(ComplexSMVector::iterator it=walkers.begin(); it<walkers.end(); it+=walker_size)
if( (it+data_displ[INFO])->real() > 0 && std::abs(*(it+data_displ[WEIGHT])) > std::abs(max_weight)) {
RealType w = std::abs(*(it+data_displ[WEIGHT]));
int n = (int) (w/std::abs(reset_weight));
RealType rem = w-n*std::abs(reset_weight);
if( ( (int)(distribution(generator) + std::abs(rem/reset_weight) ) ) != 0 ) n++;
*(it+data_displ[WEIGHT]) *= reset_weight/w;
for(int i=0; i<n-1; i++,cnt++)
std::copy( it, it+walker_size, walkers.begin()+walker_size*empty_spots[cnt] );
}
empty_spots.clear();
} else {
int nn;
walkers.share<int>(&nn,1,head);
if(nn > 0)
walkers.resize(nn,false);
}
walkers.barrier();
tot_num_walkers=0;
for(ComplexSMVector::iterator it=walkers.begin()+data_displ[INFO]; it<walkers.end(); it+=walker_size)
if(it->real() > 0) tot_num_walkers++;
}
}

View File

@ -0,0 +1,323 @@
// -*- C++ -*-
// /**@file WalkerHandler
// * @brief Virtual Class for walker handlers.
// */
#ifndef QMCPLUSPLUS_AFQMC_DISTWALKERHANDLER_H
#define QMCPLUSPLUS_AFQMC_DISTWALKERHANDLER_H
#include<random>
#include "OhmmsData/libxmldefs.h"
#include "io/hdf_archive.h"
#include"AFQMC/config.h"
#include"AFQMC/Walkers/WalkerHandlerBase.h"
#include<Message/MPIObjectBase.h>
#include "Message/CommOperators.h"
#include "AFQMC/Numerics/DenseMatrixOperations.h"
namespace qmcplusplus
{
/*
* Class that contains and handles walkers.
* Implements communication, load balancing, and I/O operations.
* Walkers are always accessed through the handler.
*/
class DistWalkerHandler: public WalkerHandlerBase
{
public:
/// constructor
DistWalkerHandler(Communicate* c): WalkerHandlerBase(c,std::string("DistWalkerHandler")),
min_weight(0.1),max_weight(10.0)
,reset_weight(5.0),extra_empty_spaces(10),distribution(0.0,1.0)
,head(false),walker_memory_usage(0),tot_num_walkers(0),maximum_num_walkers(0)
{ }
/// destructor
~DistWalkerHandler() {}
inline int size() { return walkers.size()/walker_size; }
bool restartFromXML();
bool dumpToXML();
bool restartFromHDF5(int n, hdf_archive&, const std::string&, bool set_to_target);
bool dumpToHDF5(hdf_archive&, const std::string&);
bool dumpSamplesHDF5(hdf_archive& dump, int nW) {
std::cerr<<" DistlWalkerHandler:dumpSamplesHDF5() not implemented. Not writing anything. \n";
return true;
}
// reads xml and performs setup
bool parse(xmlNodePtr cur);
// performs setup
bool setup(int,int,int,MPI_Comm,MPI_Comm,MPI_Comm,myTimer*);
// cleans state of object.
// -erases allocated memory
bool clean();
// called at the beginning of each executable section
void resetNumberWalkers(int n, bool a=true, ComplexMatrix* S=NULL);
inline bool initWalkers(int n) {
resetNumberWalkers(n,true,&HFMat);
// walkers.resize(n);
// for(int i=0; i<n; i++) walkers[i].initWalker(HFMat);
return true;
}
inline int numWalkers(bool dummy=false) {
// checking
int cnt=0;
for(ComplexSMVector::iterator it=walkers.begin()+data_displ[INFO];
it<walkers.end(); it+=walker_size)
if(it->real() > 0) cnt++;
if(cnt != tot_num_walkers)
APP_ABORT(" Error in DistWalkerHandler::numWalkers(): Incorrect number of walkers. \n");
if(dummy)
return size();
else
return tot_num_walkers;
}
inline int GlobalPopulation() {
std::vector<int> res(1);
res[0]=0;
if(head)
res[0] += tot_num_walkers;
myComm->gsum(res);
return res[0];
}
inline RealType GlobalWeight() {
std::vector<RealType> res(1);
res[0]=0;
if(head)
for(int i=0; i<tot_num_walkers; i++)
res[0] += std::abs(getWeight(i));
myComm->gsum(res);
return res[0];
}
// load balancing algorithm
void loadBalance();
// population control algorithm
void popControl();
void setHF(const ComplexMatrix& HF);
inline void Orthogonalize(int i) {
if(walkerType == "closed_shell") {
DenseMatrixOperators::GeneralizedGramSchmidt(&(walkers[walker_size*i+data_displ[SM]]),NAEA,NMO,NAEA);
} else if(walkerType == "collinear") {
DenseMatrixOperators::GeneralizedGramSchmidt(&(walkers[walker_size*i+data_displ[SM]]),NAEA,NMO,NAEA);
DenseMatrixOperators::GeneralizedGramSchmidt(&((walkers)[walker_size*i+data_displ[SM]])+NAEA*NMO,NAEA,NMO,NAEB);
} else if(walkerType == "non-collinear") {
APP_ABORT("ERROR: non-collinear not implemented in Orthogonalize. \n\n\n");
}
}
inline void Orthogonalize() {
for(int i=0; i<tot_num_walkers; i++)
if( i%ncores_per_TG == core_rank)
Orthogonalize(i);
}
//private:
// using std::random for simplicity now
std::default_random_engine generator;
std::uniform_real_distribution<double> distribution;
enum walker_data { INFO=0, SM=1, WEIGHT=2, ELOC=3, ELOC_OLD=4, OVLP_A=5, OVLP_B=6, OLD_OVLP_A=7, OLD_OVLP_B=8};
// n is zero-based
ComplexType* getSM(int n) { if(n>=tot_num_walkers) {return NULL;} else {return &((walkers)[walker_size*n+data_displ[SM]]);} }
ComplexType getWeight(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[WEIGHT]];} }
ComplexType getEloc(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[ELOC]];} }
ComplexType getOldEloc(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[ELOC_OLD]];} }
ComplexType getOvlpAlpha(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OVLP_A]];} }
ComplexType getOvlpBeta(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OVLP_B]];} }
ComplexType getOldOvlpAlpha(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OLD_OVLP_A]];} }
ComplexType getOldOvlpBeta(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OLD_OVLP_B]];} }
void setWeight(int n, ComplexType Q) { if(n >= 0 && n<tot_num_walkers) {(walkers)[walker_size*n+data_displ[WEIGHT]]=Q;} }
void setEloc(int n, ComplexType Q) { if(n>=tot_num_walkers) {return;} else { (walkers)[walker_size*n+data_displ[ELOC]] = Q;} }
void setOldEloc(int n, ComplexType Q) { if(n>=tot_num_walkers) {return;} else { (walkers)[walker_size*n+data_displ[ELOC_OLD]] = Q;} }
void setOvlp(int n, ComplexType Q1, ComplexType Q2) {
if(n>=tot_num_walkers) {return;}
else {
(walkers)[walker_size*n+data_displ[OVLP_A]] = Q1;
(walkers)[walker_size*n+data_displ[OVLP_B]] = Q2;
}
}
void setOldOvlp(int n, ComplexType Q1, ComplexType Q2) {
if(n>=tot_num_walkers) {return;}
else {
(walkers)[walker_size*n+data_displ[OLD_OVLP_A]] = Q1;
(walkers)[walker_size*n+data_displ[OLD_OVLP_B]] = Q2;
}
}
void setCurrToOld(int n) {
if(n>=tot_num_walkers) {return;}
else {
(walkers)[walker_size*n+data_displ[ELOC_OLD]] = (walkers)[walker_size*n+data_displ[ELOC]];
(walkers)[walker_size*n+data_displ[OLD_OVLP_A]] = (walkers)[walker_size*n+data_displ[OVLP_A]];
(walkers)[walker_size*n+data_displ[OLD_OVLP_B]] = (walkers)[walker_size*n+data_displ[OVLP_B]];
}
}
ComplexType getEloc2(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[ELOC]+1];} }
ComplexType getOldEloc2(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[ELOC_OLD]+1];} }
ComplexType getOvlpAlpha2(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OVLP_A]+1];} }
ComplexType getOvlpBeta2(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OVLP_B]+1];} }
ComplexType getOldOvlpAlpha2(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OLD_OVLP_A]+1];} }
ComplexType getOldOvlpBeta2(int n) { if(n>=tot_num_walkers) {return zero;} else {return (walkers)[walker_size*n+data_displ[OLD_OVLP_B]+1];} }
void setEloc2(int n, ComplexType Q) { if(n>=tot_num_walkers) {return;} else { (walkers)[walker_size*n+data_displ[ELOC]+1] = Q;} }
void setOldEloc2(int n, ComplexType Q) { if(n>=tot_num_walkers) {return;} else { (walkers)[walker_size*n+data_displ[ELOC_OLD]+1] = Q;} }
void setOvlp2(int n, ComplexType Q1, ComplexType Q2) {
if(n>=tot_num_walkers) {return;}
else {
(walkers)[walker_size*n+data_displ[OVLP_A]+1] = Q1;
(walkers)[walker_size*n+data_displ[OVLP_B]+1] = Q2;
}
}
void setOldOvlp2(int n, ComplexType Q1, ComplexType Q2) {
if(n>=tot_num_walkers) {return;}
else {
(walkers)[walker_size*n+data_displ[OLD_OVLP_A]+1] = Q1;
(walkers)[walker_size*n+data_displ[OLD_OVLP_B]+1] = Q2;
}
}
void setCurrToOld2(int n) {
if(n>=tot_num_walkers) {return;}
else {
(walkers)[walker_size*n+data_displ[ELOC_OLD]+1] = (walkers)[walker_size*n+data_displ[ELOC]+1];
(walkers)[walker_size*n+data_displ[OLD_OVLP_A]+1] = (walkers)[walker_size*n+data_displ[OVLP_A]+1];
(walkers)[walker_size*n+data_displ[OLD_OVLP_B]+1] = (walkers)[walker_size*n+data_displ[OVLP_B]+1];
}
}
void setWalker(int n, ComplexType eloc, ComplexType oa, ComplexType ob) {
if(n>=tot_num_walkers) {return;}
(walkers)[walker_size*n+data_displ[ELOC]] = eloc;
(walkers)[walker_size*n+data_displ[OVLP_A]] = oa;
(walkers)[walker_size*n+data_displ[OVLP_B]] = ob;
}
void setWalker(int n, ComplexType w0 ,ComplexType eloc) {
if(n>=tot_num_walkers) {return;}
(walkers)[walker_size*n+data_displ[WEIGHT]] = w0;
(walkers)[walker_size*n+data_displ[ELOC]] = eloc;
}
void setWalker(int n, ComplexType w0 ,ComplexType eloc, ComplexType oa, ComplexType ob) {
if(n>=tot_num_walkers) {return;}
(walkers)[walker_size*n+data_displ[WEIGHT]] = w0;
(walkers)[walker_size*n+data_displ[ELOC]] = eloc;
(walkers)[walker_size*n+data_displ[OVLP_A]] = oa;
(walkers)[walker_size*n+data_displ[OVLP_B]] = ob;
}
void setWalker2(int n, ComplexType eloc, ComplexType oa, ComplexType ob) {
if(n>=tot_num_walkers) {return;}
(walkers)[walker_size*n+data_displ[ELOC]+1] = eloc;
(walkers)[walker_size*n+data_displ[OVLP_A]+1] = oa;
(walkers)[walker_size*n+data_displ[OVLP_B]+1] = ob;
}
void getOldWalker(int n, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
if(n>=tot_num_walkers) {return ;}
eloc = (walkers)[walker_size*n+data_displ[ELOC_OLD]];
oa = (walkers)[walker_size*n+data_displ[OLD_OVLP_A]];
ob = (walkers)[walker_size*n+data_displ[OLD_OVLP_B]];
}
ComplexType* getWalker(int n, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
if(n>=tot_num_walkers) {return NULL;}
eloc = (walkers)[walker_size*n+data_displ[ELOC]];
oa = (walkers)[walker_size*n+data_displ[OVLP_A]];
ob = (walkers)[walker_size*n+data_displ[OVLP_B]];
return &((walkers)[walker_size*n+data_displ[SM]]);
}
ComplexType* getWalker2(int n, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
if(n>=tot_num_walkers) {return NULL;}
eloc = (walkers)[walker_size*n+data_displ[ELOC]+1];
oa = (walkers)[walker_size*n+data_displ[OVLP_A]+1];
ob = (walkers)[walker_size*n+data_displ[OVLP_B]+1];
return &((walkers)[walker_size*n+data_displ[SM]+1]);
}
ComplexType* getWalker(int n, ComplexType& w, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
if(n>=tot_num_walkers) {return NULL;}
w = (walkers)[walker_size*n+data_displ[WEIGHT]];
eloc = (walkers)[walker_size*n+data_displ[ELOC]];
oa = (walkers)[walker_size*n+data_displ[OVLP_A]];
ob = (walkers)[walker_size*n+data_displ[OVLP_B]];
return &((walkers)[walker_size*n+data_displ[SM]]);
}
bool isAlive(int n) {
return ((walkers)[walker_size*n+data_displ[INFO]].real()) > 0 ; // for now
}
void scaleWeight(RealType w0) {
if(!head) return;
for(int i=0; i<tot_num_walkers; i++)
(walkers)[walker_size*i+data_displ[WEIGHT]] *= w0;
}
void push_walkers_to_front();
// Stored data [all assumed std::complex numbers]:
// - INFO: 1 (e.g. alive, init, etc)
// - SlaterMatrix: NCOL*NROW
// type = 1 for closed_shell
// type = 2 for non-closed shell collinear
// type = 4 for non-collinear
// - weight: 1
// - eloc: 2
// - eloc_old: 2
// - overlap_alpha: 2
// - overlap_beta: 2
// - old_overlap_alpha: 2
// - old_overlap_beta: 2
// Total: 14+NROW*NCOL
int type, nrow, ncol;
int walker_size, data_displ[9], walker_memory_usage;
int tot_num_walkers;
int maximum_num_walkers;
ComplexType zero = ComplexType(0.0,0.0);
RealType min_weight, max_weight, reset_weight;
int extra_empty_spaces;
ComplexMatrix HFMat;
std::vector<int> empty_spots;
bool head;
// container with walker data
ComplexSMVector walkers;
MPI_Comm MPI_COMM_TG_LOCAL;
MPI_Comm MPI_COMM_TG_LOCAL_HEADS;
int nproc_heads, rank_heads;
std::vector<std::tuple<int,int>> outgoing, incoming;
std::vector<int> counts,displ;
std::vector<char> bufferall;
myTimer* LocalTimer;
};
}
#endif

View File

@ -0,0 +1,642 @@
#include<cassert>
#include<random>
#include<cstdlib>
#include "Configuration.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "Message/CommOperators.h"
#include"AFQMC/Walkers/SlaterDetWalker.h"
#include"AFQMC/Walkers/LocalWalkerHandler.h"
namespace qmcplusplus
{
bool LocalWalkerHandler::restartFromXML()
{
return true;
}
bool LocalWalkerHandler::dumpToXML()
{
return true;
}
// not thinking about efficiency since this is only used once during initialization
bool LocalWalkerHandler::restartFromHDF5(int n, hdf_archive& read, const std::string& tag, bool set_to_target)
{
// return initWalkers(n);
int nproc = myComm->size();
Walker dummy;
dummy.initWalker(HFMat,true);
int sz = dummy.sizeForDump();
std::vector<int> from(nproc);
int cnt,nWtot = 0;
std::vector<char> buffer;
std::vector<char> bufferall;
std::vector<int> counts(nproc), displ(nproc);
if(myComm->rank()==0) {
std::vector<int> Idata(2);
std::string path = "/Walkers/LocalWalkerHandler";
if(tag != std::string("")) path += std::string("/")+tag;
if(!read.is_group( path )) {
app_error()<<" ERROR: H5Group could not find /Walkers/LocalWalkerHandler/{tag} group in file. No restart data for walkers. \n";
return false;
}
if(!read.push("Walkers")) return false;
if(!read.push("LocalWalkerHandler")) return false;
if(tag != std::string("")) if(!read.push(tag)) return false;
if(!read.read(Idata,"LocalWalkerHandler_dims")) return false;
nWtot=Idata[0];
app_log()<<"Found " <<nWtot <<" walkers on restart file." <<std::endl;
if(Idata[1] != sz) {
app_error()<<" ERROR: Size of walker is not consistent in hdf5 file. \n";
app_error()<<" sz_sim, sz_file: " <<sz <<" " <<Idata[1] <<std::endl;
return false;
}
bufferall.resize(nWtot*sz);
if(!read.read(bufferall,"LocalWalkerHandler_walkers")) return false;
if(tag != std::string("")) read.pop();
read.pop();
read.pop();
int nWperProc = nWtot/nproc;
int nWextra = nWtot%nproc;
if( set_to_target && nWperProc >= n ) {
nWperProc = n;
nWextra = 0;
}
for(int i=0; i<nWextra; i++) from[i] = nWperProc+1;
for(int i=nWextra; i<nproc; i++) from[i] = nWperProc;
myComm->bcast(from);
int nW = from[myComm->rank()];
walkers.clear();
if(set_to_target) walkers.resize(n);
else walkers.resize(nW);
cnt=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.begin()+nW; it++) {
it->initWalker(HFMat);
it->restartFromChar( bufferall.data()+cnt );
cnt+=sz;
}
cnt=0;
for(WalkerIterator it=walkers.begin()+nW; it!=walkers.end(); it++) {
it->initWalker(HFMat);
*it = walkers[cnt];
cnt++;
if(cnt == nW) cnt=0;
}
displ[0]=0;
displ[1]=0;
counts[0]=0;
for(int i=1; i<nproc-1; i++) {
counts[i] = from[i]*sz;
displ[i+1] = displ[i]+counts[i];
}
counts[nproc-1] = from[nproc-1]*sz;
myComm->scatterv( bufferall, buffer, counts, displ, 0);
} else {
myComm->bcast(from);
int nW = from[myComm->rank()];
buffer.resize(nW*sz);
myComm->scatterv( bufferall, buffer, counts, displ, 0);
walkers.clear();
if(set_to_target) walkers.resize(n);
else walkers.resize(nW);
cnt=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.begin()+nW; it++) {
it->initWalker(HFMat);
it->restartFromChar( buffer.data()+cnt );
cnt+=sz;
}
cnt=0;
for(WalkerIterator it=walkers.begin()+nW; it!=walkers.end(); it++) {
it->initWalker(HFMat);
*it = walkers[cnt];
cnt++;
if(cnt == nW) cnt=0;
}
}
return true;
}
bool LocalWalkerHandler::dumpToHDF5(hdf_archive& dump, const std::string& tag)
{
// check that restart data doesnot exist
std::string path = "/Walkers/LocalWalkerHandler";
if(tag != std::string("")) path += std::string("/")+tag;
if(dump.is_group( path )) {
app_error()<<" ERROR: H5Group /Walkers/LocalWalkerHandler/{tag} already exists in restart file. This is a bug and should not happen. Contact a developer.\n";
return false;
}
// doing one big gatherV, not sure if this is the best way right now
int nW = numWalkers();
int sz = walkers[0].sizeForDump();
std::vector<int> to(1,nW);
std::vector<int> from(myComm->size());
int nWtot = 0;
myComm->allgather(to,from,1);
for(int i=0; i<myComm->size(); i++) nWtot += from[i];
std::vector<char> buffer(nW*sz);
std::vector<char> bufferall;
if(myComm->rank() == 0)
bufferall.resize(nWtot*sz);
int cnt=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); it++)
if(it->alive) {
it->dumpToChar( buffer.data()+cnt );
cnt+=sz;
}
std::vector<int> displ(myComm->size());
for(int i=0; i<myComm->size(); i++) from[i]*=sz;
cnt=0;
for(int i=0; i<myComm->size(); i++) {
displ[i] = cnt;
cnt+=from[i];
}
myComm->gatherv(buffer,bufferall,from,displ,0);
if(myComm->rank()==0) {
// now write to HDF5 file
std::vector<int> Idata(2);
Idata[0]=nWtot;
Idata[1]=sz;
dump.push("Walkers");
dump.push("LocalWalkerHandler");
if(tag != std::string("")) dump.push(tag);
dump.write(Idata,"LocalWalkerHandler_dims");
dump.write(bufferall,"LocalWalkerHandler_walkers");
if(tag != std::string("")) dump.pop();
dump.pop();
dump.pop();
dump.flush();
}
return true;
}
bool LocalWalkerHandler::parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
ParameterSet m_param;
m_param.add(reset_weight,"reset_weight","double");
m_param.add(max_weight,"max_weight","double");
m_param.add(min_weight,"min_weight","double");
m_param.add(extra_empty_spaces,"extra_spaces","int");
m_param.put(cur);
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="something") {
}
cur = cur->next;
}
return true;
}
void LocalWalkerHandler::setHF(const ComplexMatrix& HF)
{
//assert( (HF.rows() == 2*NMO || HF.rows() == 4*NMO) && HF.cols()==NAEA );
HFMat.resize(HF.rows(),HF.cols());
HFMat = HF;
}
bool LocalWalkerHandler::setup(int cr, int nc, int tgn, MPI_Comm heads_comm, MPI_Comm tg_comm, MPI_Comm node_comm, myTimer* timer)
{
LocalTimer=timer;
if(cr != 0) {
app_error()<<" Error: Found core_rank != 0 in LocalWalkerHandler::setup. \n";
APP_ABORT(" Error: Found ncores_per_TG != 1 in LocalWalkerHandler::setup. \n");
return false;
}
if(nc != 1) {
app_error()<<" Error: Found ncores_per_TG != 1 in LocalWalkerHandler::setup. \n";
APP_ABORT(" Error: Found ncores_per_TG != 1 in LocalWalkerHandler::setup. \n");
return false;
}
ncores_per_TG=1;
core_rank=0;
// default setup
HFMat.resize(2*NMO,NAEA);
for(int i=0; i<2*NMO; i++)
for(int j=0; j<NAEA; j++)
HFMat(i,j) = ComplexType(0.0,0.0);
for(int j=0; j<NAEA; j++)
HFMat(j,j) = ComplexType(1.0,0.0);
for(int j=0; j<NAEB; j++)
HFMat(j+NMO,j) = ComplexType(1.0,0.0);
// walkerSizeForCommunication
// walkerSizeForDump
// setup buffers for dump and communication
min_weight = std::max(std::abs(min_weight),1e-2);
return true;
}
bool LocalWalkerHandler::clean()
{
for(int i=0; i<walkers.size(); i++)
walkers[i].clear();
}
// called at the beginning of each executable section
void LocalWalkerHandler::resetNumberWalkers(int n, bool a, ComplexMatrix* S)
{
ComplexMatrix* S0 = S;
if(S0==NULL) S0 = &HFMat;
// assuming everything is alive
int nold = walkers.size();
if(nold > n ) {
walkers.erase(walkers.begin()+n,walkers.end());
} else {
walkers.resize(n);
for(int i=nold; i<n; i++)
walkers[i].initWalker(*S0,true);;
}
}
/*
void LocalWalkerHandler::resetNumberWalkers(int n, bool a, ComplexMatrix* S)
{
ComplexMatrix* S0 = S;
if(S0==NULL) S0 = &HFMat;
int nold = numWalkers();
int old_size = walkers.size();
assert( walkers.size() == nold+emptyWalkers.size() );
if(nold == n) {
// still resize to add empty spaces if needed
if(emptyWalkers.size() >= extra_empty_spaces) return;
int add = extra_empty_spaces - emptyWalkers.size();
emptyWalkers.reserve(2*extra_empty_spaces);
// careful with c++11 move semantics
walkers.resize(old_size+add);
for(int i=old_size; i<walkers.size(); i++) {
walkers[i].initWalker(*S0,false);
emptyWalkers.push_back(i);
}
// sort in decreasing order to keep list "tight"
std::sort(emptyWalkers.begin(),emptyWalkers.end(),sortDecreasing);
return;
}
if(nold == 0) {
int add = n+extra_empty_spaces - old_size;
emptyWalkers.reserve(2*extra_empty_spaces);
if(add > 0) {
walkers.resize(old_size+add);
for(int i=old_size; i<walkers.size(); i++) {
walkers[i].initWalker(*S0,false);
emptyWalkers.push_back(i);
}
}
// sort in decreasing order to keep list "tight"
std::sort(emptyWalkers.begin(),emptyWalkers.end(),sortDecreasing);
for(int i=0; i<n; i++) {
int pos = emptyWalkers.back();
emptyWalkers.pop_back();
walkers[pos].initWalker(*S0,a);
}
return;
}
if( nold > n ) {
int res=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive)
if(res < n) res++;
else {
res++;
it->weight=RealType(0.0);
//it->weight=ComplexType(0.0);
it->alive=false;
emptyWalkers.push_back(std::distance(walkers.begin(),it));
}
int add = n+extra_empty_spaces - old_size;
emptyWalkers.reserve(2*extra_empty_spaces);
if(add > 0) {
walkers.resize(old_size+add);
for(int i=old_size; i<walkers.size(); i++) {
walkers[i].initWalker(*S0,false);
emptyWalkers.push_back(i);
}
}
// sort in decreasing order to keep list "tight"
std::sort(emptyWalkers.begin(),emptyWalkers.end(),sortDecreasing);
return;
}
if(nold < n) {
int add = n+extra_empty_spaces - old_size;
emptyWalkers.reserve(2*extra_empty_spaces);
if(S == NULL) {
for(int i=0; i<walkers.size(); i++)
if(walkers[i].alive) {
S0 = &(walkers[i].SlaterMat);
break;
}
}
if(add > 0) {
walkers.resize(old_size+add);
for(int i=old_size; i<walkers.size(); i++) {
walkers[i].initWalker(*S0,false);
emptyWalkers.push_back(i);
}
}
// sort in decreasing order to keep list "tight"
std::sort(emptyWalkers.begin(),emptyWalkers.end(),sortDecreasing);
for(int i=0; i<n-nold; i++) {
int pos = emptyWalkers.back();
emptyWalkers.pop_back();
walkers[pos].initWalker(*S0,a);
}
return;
}
}
*/
// load balancing algorithm
void LocalWalkerHandler::loadBalance()
{
int rank = myComm->rank();
int nproc = myComm->size();
if(nproc==1) return;
int nW = numWalkers();
int sz = walkers[0].sizeForDump();
nwalk_counts_old.resize(nproc);
nwalk_counts_new.resize(nproc);
nwalk_global=0;
std::vector<int> counts(nproc),displ(nproc);
for(int i=0; i<nproc; i++)
nwalk_counts_old[i] = nwalk_counts_new[i] = 0;
nwalk_counts_new[0] = nW;
myComm->allgather(nwalk_counts_new,nwalk_counts_old,1);
for(int i=0; i<nproc; i++)
nwalk_global+=nwalk_counts_old[i];
for(int i=0; i<nproc; i++)
nwalk_counts_new[i] = nwalk_global/nproc + ((i<nwalk_global%nproc)?(1):(0));
auto min_max = std::minmax_element(nwalk_counts_old.begin(),nwalk_counts_old.end());
nwalk_min = *min_max.first;
nwalk_max = *min_max.second;
/*
if(rank==0) {
app_log()<<"min,max: " <<nwalk_min <<" " <<nwalk_max <<std::endl;
for(int i=0; i<nproc; i++)
app_log()<<nwalk_counts_old[i] <<" ";
app_log()<<std::endl;
for(int i=0; i<nproc; i++)
app_log()<<nwalk_counts_new[i] <<" ";
app_log()<<std::endl;
}
*/
std::vector<char> buffer;
std::vector<char> bufferall;
int ncomm = nW - nwalk_counts_new[rank];
int pos = 0, cnt=0;
buffer.reserve(std::abs(sz*ncomm));
if(ncomm > 0) {
buffer.resize(sz*ncomm);
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it) {
if(it->alive) {
cnt++;
if(cnt > nwalk_counts_new[rank]) {
it->dumpToChar(buffer.data()+pos);
pos+=sz;
it->alive = false;
it->weight = ComplexType(0,0);
}
}
}
// until I code the use of it->alive properly
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ) {
if (!it->alive ) it=walkers.erase(it);
else ++it;
}
}
if(rank==0) {
// setup gatherv call
displ[0]=0;
for(int i=0; i<nproc-1; i++)
if(nwalk_counts_old[i] > nwalk_counts_new[i]) {
counts[i] = (nwalk_counts_old[i]-nwalk_counts_new[i])*sz;
displ[i+1] = displ[i]+counts[i];
} else {
counts[i]=0;
displ[i+1] = displ[i];
}
if(nwalk_counts_old[nproc-1] > nwalk_counts_new[nproc-1])
counts[nproc-1] = (nwalk_counts_old[nproc-1]-nwalk_counts_new[nproc-1])*sz;
else
counts[nproc-1]=0;
bufferall.resize(displ[nproc-1]+counts[nproc-1]);
myComm->gatherv( buffer, bufferall, counts, displ, 0);
// setup scatterv call
displ[0]=0;
for(int i=0; i<nproc-1; i++)
if(nwalk_counts_old[i] < nwalk_counts_new[i]) {
counts[i] = (nwalk_counts_new[i]-nwalk_counts_old[i])*sz;
displ[i+1] = displ[i]+counts[i];
} else {
counts[i]=0;
displ[i+1] = displ[i];
}
if(nwalk_counts_old[nproc-1] < nwalk_counts_new[nproc-1])
counts[nproc-1] = (nwalk_counts_new[nproc-1]-nwalk_counts_old[nproc-1])*sz;
else
counts[nproc-1]=0;
if(ncomm < 0)
buffer.resize(sz*std::abs(ncomm));
else
buffer.resize(0);
myComm->scatterv( bufferall, buffer, counts, displ, 0);
} else {
myComm->gatherv( buffer, bufferall, counts, displ, 0);
if(ncomm < 0)
buffer.resize(sz*std::abs(ncomm));
else
buffer.resize(0);
myComm->scatterv( bufferall, buffer, counts, displ, 0);
}
if(ncomm >= 0) return;
ncomm = std::abs(ncomm);
int oldsz = walkers.size();
walkers.resize(oldsz+ncomm);
pos=0;
for(int i=0; i<ncomm; i++) {
walkers[oldsz+i].initWalker(HFMat,true);
walkers[oldsz+i].restartFromChar(buffer.data()+pos);
pos+=sz;
}
/*
if(walkers.size() < nwalk_counts_new[rank]) {
int oldSz = walkers.size();
walkers.resize(nwalk_counts_new[rank]+extra_empty_spaces);
for(int i=oldSz; i<walkers.size(); i++)
walkers[i].initWalker(HFMat,false);
}
cnt=0;
pos=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it) {
if(!it->alive) {
it->restartFromChar(buffer.data()+pos);
pos+=sz;
cnt++;
it->alive=true;
}
if(cnt==ncomm) return;
}
*/
}
// population control algorithm
void LocalWalkerHandler::popControl()
{
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); )
if (!it->alive)
it=walkers.erase(it);
else
++it;
// handle small weights first
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); )
// if below cutoff, flip a coin and kill walker if necessary
// check if this is correct, do I reset the weight to min_weight if successful???
if( it->alive && std::abs(it->weight) < std::abs(min_weight) ) {
if( (int)(distribution(generator) + std::abs(it->weight)/std::abs(reset_weight)) == 0 ) {
it=walkers.erase(it);
} else {
(it++)->weight = reset_weight;
}
} else {
++it;
}
std::vector<Walker> w_;
// now handle large weights
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if( it->alive && std::abs(it->weight) > std::abs(max_weight) ) {
RealType w = std::abs(it->weight);
int n = (int) (w/std::abs(reset_weight));
RealType rem = w-n*std::abs(reset_weight);
if( ( (int)(distribution(generator) + std::abs(rem/reset_weight) ) ) != 0 ) n++;
it->weight *= reset_weight/w;
for(int i=0; i<n-1; i++) {
w_.push_back(SlaterDetWalker(*it));
}
}
int sz = walkers.size();
walkers.resize(sz+w_.size());
for(int i=0; i<w_.size(); i++)
walkers[sz+i] = w_[i];
}
/*
// population control algorithm
void LocalWalkerHandler::popControl()
{
// handle small weights first
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
// if below cutoff, flip a coin and kill walker if necessary
// check if this is correct, do I reset the weight to min_weight if successful???
if( it->alive && std::abs(it->weight) < std::abs(min_weight) )
if( ( (int)(distribution(generator) + std::abs(it->weight)) ) == 0 ) {
it->weight = RealType(0.0);
//it->weight = ComplexType(0.0);
it->alive = false;
if(emptyWalkers.size() == emptyWalkers.capacity())
emptyWalkers.reserve(static_cast<int>(emptyWalkers.size()*2));
emptyWalkers.push_back(std::distance(walkers.begin(),it));
} else {
// is this correct?????
it->weight = min_weight;
}
// now handle large weights
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if( it->alive && std::abs(it->weight) > std::abs(max_weight) ) {
RealType w = std::abs(it->weight);
int n = (int) (w/std::abs(reset_weight));
RealType rem = w-n*std::abs(reset_weight);
if( ( (int)(distribution(generator) + std::abs(rem/reset_weight) ) ) != 0 ) n++;
it->weight *= reset_weight/w;
for(int i=0; i<n-1; i++) {
if(emptyWalkers.size() == 0) {
// hack for now, to guarantee that I'm not here twice in a loop
extra_empty_spaces += n;
resetNumberWalkers(numWalkers(),false,NULL);
extra_empty_spaces -= n;
}
int pos = emptyWalkers.back();
emptyWalkers.pop_back();
walkers[pos] = *it;
}
}
}
*/
}

View File

@ -0,0 +1,274 @@
// -*- C++ -*-
// /**@file WalkerHandler
// * @brief Virtual Class for walker handlers.
// */
#ifndef QMCPLUSPLUS_AFQMC_WALKERHANDLER_H
#define QMCPLUSPLUS_AFQMC_WALKERHANDLER_H
#include<random>
#include "OhmmsData/libxmldefs.h"
#include "io/hdf_archive.h"
#include"AFQMC/config.h"
#include"AFQMC/Walkers/WalkerHandlerBase.h"
#include<Message/MPIObjectBase.h>
#include "Message/CommOperators.h"
#include "AFQMC/Numerics/DenseMatrixOperations.h"
namespace qmcplusplus
{
/*
* Class that contains and handles walkers.
* Implements communication, load balancing, and I/O operations.
* Walkers are always accessed through the handler.
*/
class LocalWalkerHandler: public WalkerHandlerBase
{
typedef SlaterDetWalker Walker;
typedef SlaterDetWalker* WalkerPtr;
typedef std::vector<SlaterDetWalker>::iterator WalkerIterator;
public:
/// constructor
LocalWalkerHandler(Communicate* c): WalkerHandlerBase(c,std::string("LocalWalkerHandler")),
walkerSizeForCommunication(0),walkerSizeForDump(0),
min_weight(0.1),max_weight(10.0)
,reset_weight(5.0),extra_empty_spaces(10),distribution(0.0,1.0)
{ }
/// destructor
~LocalWalkerHandler() {}
inline int size() { return walkers.size(); }
bool restartFromXML();
bool dumpToXML();
bool restartFromHDF5(int n, hdf_archive&, const std::string&, bool set_to_target);
bool dumpToHDF5(hdf_archive&, const std::string&);
bool dumpSamplesHDF5(hdf_archive& dump, int nW) {
std::cerr<<" LocalWalkerHandler:dumpSamplesHDF5() not implemented. Not writing anything. \n";
return true;
}
// reads xml and performs setup
bool parse(xmlNodePtr cur);
// performs setup
bool setup(int a,int b, int c, MPI_Comm, MPI_Comm , MPI_Comm,myTimer*);
// cleans state of object.
// -erases allocated memory
bool clean();
// called at the beginning of each executable section
void resetNumberWalkers(int n, bool a=true, ComplexMatrix* S=NULL);
inline bool initWalkers(int n) {
resetNumberWalkers(n,true,&HFMat);
// walkers.resize(n);
// for(int i=0; i<n; i++) walkers[i].initWalker(HFMat);
return true;
}
inline int numWalkers(bool includeDead=false) {
int res=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive || includeDead) res++;
return res;
}
inline int numWalkers2() {
int res=0;
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive && std::abs(it->weight) > 1e-6 ) res++;
return res;
}
inline int GlobalPopulation() {
std::vector<int> res(1);
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive) res[0]++;
myComm->gsum(res);
return res[0];
}
inline RealType GlobalWeight() {
std::vector<RealType> res(1);
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive) res[0] += std::abs(it->weight);
myComm->gsum(res);
return res[0];
}
// load balancing algorithm
void loadBalance();
// population control algorithm
void popControl();
void setHF(const ComplexMatrix& HF);
inline void Orthogonalize(int i) {
DenseMatrixOperators::GeneralizedGramSchmidt(walkers[i].SlaterMat.data(),NAEA,NMO,NAEA);
DenseMatrixOperators::GeneralizedGramSchmidt(walkers[i].SlaterMat.data()+NAEA*NMO,NAEA,NMO,NAEB);
}
inline void Orthogonalize() {
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive) {
DenseMatrixOperators::GeneralizedGramSchmidt(it->SlaterMat.data(),NAEA,NMO,NAEA);
DenseMatrixOperators::GeneralizedGramSchmidt(it->SlaterMat.data()+NAEA*NMO,NAEA,NMO,NAEB);
}
}
ComplexType* getSM(int n) { return (walkers[n].SlaterMat).data(); }
ComplexType getWeight(int n) { return walkers[n].weight; }
ComplexType getEloc(int n) { return std::get<0>(walkers[n].eloc); }
ComplexType getEloc2(int n) { return std::get<1>(walkers[n].eloc); }
ComplexType getOldEloc(int n) { return std::get<0>(walkers[n].eloc_old); }
ComplexType getOldEloc2(int n) { return std::get<1>(walkers[n].eloc_old); }
ComplexType getOvlpAlpha(int n) { return std::get<0>(walkers[n].overlap_alpha); }
ComplexType getOvlpAlpha2(int n) { return std::get<1>(walkers[n].overlap_alpha); }
ComplexType getOvlpBeta(int n) { return std::get<0>(walkers[n].overlap_beta); }
ComplexType getOvlpBeta2(int n) { return std::get<1>(walkers[n].overlap_beta); }
ComplexType getOldOvlpAlpha(int n) { return std::get<0>(walkers[n].old_overlap_alpha); }
ComplexType getOldOvlpAlpha2(int n) { return std::get<1>(walkers[n].old_overlap_alpha); }
ComplexType getOldOvlpBeta(int n) { return std::get<0>(walkers[n].old_overlap_beta); }
ComplexType getOldOvlpBeta2(int n) { return std::get<1>(walkers[n].old_overlap_beta); }
void setWeight(int n, ComplexType Q) { walkers[n].weight=Q; }
void setEloc(int n, ComplexType Q) { std::get<0>(walkers[n].eloc) = Q; }
void setEloc2(int n, ComplexType Q) { std::get<1>(walkers[n].eloc) = Q; }
void setOldEloc(int n, ComplexType Q) { std::get<0>(walkers[n].eloc_old)=Q; }
void setOldEloc2(int n, ComplexType Q) { std::get<1>(walkers[n].eloc_old)=Q; }
void setOvlp(int n, ComplexType Q1, ComplexType Q2) {
std::get<0>(walkers[n].overlap_alpha) = Q1;
std::get<0>(walkers[n].overlap_beta) = Q2;
}
void setOvlp2(int n, ComplexType Q1, ComplexType Q2) {
std::get<1>(walkers[n].overlap_alpha) = Q1;
std::get<1>(walkers[n].overlap_beta) = Q2;
}
void setOldOvlp(int n, ComplexType Q1, ComplexType Q2) {
std::get<0>(walkers[n].old_overlap_alpha) = Q1;
std::get<0>(walkers[n].old_overlap_beta) = Q2;
}
void setOldOvlp2(int n, ComplexType Q1, ComplexType Q2) {
std::get<1>(walkers[n].old_overlap_alpha) = Q1;
std::get<1>(walkers[n].old_overlap_beta) = Q2;
}
void setCurrToOld(int n) {
std::get<0>(walkers[n].eloc_old) = std::get<0>(walkers[n].eloc);
std::get<0>(walkers[n].old_overlap_alpha) = std::get<0>(walkers[n].overlap_alpha);
std::get<0>(walkers[n].old_overlap_beta) = std::get<0>(walkers[n].overlap_beta);
}
void setCurrToOld2(int n) {
std::get<1>(walkers[n].eloc_old) = std::get<1>(walkers[n].eloc);
std::get<1>(walkers[n].old_overlap_alpha) = std::get<1>(walkers[n].overlap_alpha);
std::get<1>(walkers[n].old_overlap_beta) = std::get<1>(walkers[n].overlap_beta);
}
void setWalker(int n, ComplexType eloc, ComplexType oa, ComplexType ob) {
std::get<0>(walkers[n].eloc) = eloc;
std::get<0>(walkers[n].overlap_alpha) = oa;
std::get<0>(walkers[n].overlap_beta) = ob;
}
void setWalker(int n, ComplexType w0, ComplexType eloc, ComplexType oa, ComplexType ob) {
walkers[n].weight = w0;
std::get<0>(walkers[n].eloc) = eloc;
std::get<0>(walkers[n].overlap_alpha) = oa;
std::get<0>(walkers[n].overlap_beta) = ob;
}
void setWalker(int n, ComplexType w0, ComplexType eloc) {
walkers[n].weight = w0;
std::get<0>(walkers[n].eloc) = eloc;
}
void setWalker2(int n, ComplexType eloc, ComplexType oa, ComplexType ob) {
std::get<1>(walkers[n].eloc) = eloc;
std::get<1>(walkers[n].overlap_alpha) = oa;
std::get<1>(walkers[n].overlap_beta) = ob;
}
ComplexType* getWalker(int n, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
eloc = std::get<0>(walkers[n].eloc);
oa = std::get<0>(walkers[n].overlap_alpha);
ob = std::get<0>(walkers[n].overlap_beta);
return (walkers[n].SlaterMat).data();
}
ComplexType* getWalker2(int n, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
eloc = std::get<1>(walkers[n].eloc);
oa = std::get<1>(walkers[n].overlap_alpha);
ob = std::get<1>(walkers[n].overlap_beta);
return (walkers[n].SlaterMat).data();
}
void getOldWalker(int n, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
eloc = std::get<0>(walkers[n].eloc_old);
oa = std::get<0>(walkers[n].old_overlap_alpha);
ob = std::get<0>(walkers[n].old_overlap_beta);
}
ComplexType* getWalker(int n, ComplexType& w, ComplexType& eloc, ComplexType& oa, ComplexType& ob) {
w = walkers[n].weight;
eloc = std::get<0>(walkers[n].eloc);
oa = std::get<0>(walkers[n].overlap_alpha);
ob = std::get<0>(walkers[n].overlap_beta);
return (walkers[n].SlaterMat).data();
}
bool isAlive(int n) {
return walkers[n].alive;
}
void scaleWeight(RealType w0) {
for(WalkerIterator it=walkers.begin(); it!=walkers.end(); ++it)
if(it->alive) it->weight *= w0;
}
//private:
// using std::random for simplicity now
std::default_random_engine generator;
std::uniform_real_distribution<double> distribution;
ComplexType min_weight, max_weight, reset_weight;
int extra_empty_spaces;
ComplexMatrix HFMat;
// memory footprint (in bytes) of a walker for exchange between processors
int walkerSizeForCommunication;
// memory footprint (in bytes) of a walker for dump
int walkerSizeForDump;
// locations of empty spots in list
std::vector<int> emptyWalkers;
// container of walker pointers
std::vector<Walker> walkers;
private:
inline WalkerIterator begin(int i) { return walkers.begin()+i; }
inline WalkerIterator begin() { return walkers.begin(); }
inline WalkerIterator end() { return walkers.end(); }
inline WalkerPtr getWalkerPtr(size_t i) { return &(walkers[i]); }
myTimer* LocalTimer;
};
}
#endif

View File

@ -0,0 +1,179 @@
// -*- C++ -*-
// /**@file WalkerHandlerBase
// * @brief Virtual Class for walker handlers.
// */
#ifndef QMCPLUSPLUS_AFQMC_SLATERDETWALKER_H
#define QMCPLUSPLUS_AFQMC_SLATERDETWALKER_H
#include<iostream>
#include <cstdlib>
#include <cstring>
#include"AFQMC/config.h"
namespace qmcplusplus
{
/*
* Base (virtual) class for a walker.
*/
class SlaterDetWalker: public AFQMCInfo
{
typedef SlaterDetWalker WalkerPtr;
typedef std::vector<WalkerPtr>::iterator WalkerIterator;
public:
/// constructor
SlaterDetWalker():alive(false),init(false) {}
SlaterDetWalker(const SlaterDetWalker& w)
{
init=w.init;
alive=w.alive;
SlaterMat.resize(w.SlaterMat.rows(),w.SlaterMat.cols());
SlaterMat=w.SlaterMat;
weight=w.weight;
energy_full=w.energy_full;
eloc=w.eloc;
eloc_old=w.eloc_old;
overlap_alpha=w.overlap_alpha;
overlap_beta=w.overlap_beta;
old_overlap_alpha=w.old_overlap_alpha;
old_overlap_beta=w.old_overlap_beta;
}
SlaterDetWalker& operator=(const SlaterDetWalker& w)
{
if(this == &w)
return *this;
init=w.init;
alive=w.alive;
SlaterMat.resize(w.SlaterMat.rows(),w.SlaterMat.cols());
SlaterMat=w.SlaterMat;
weight=w.weight;
energy_full=w.energy_full;
eloc=w.eloc;
eloc_old=w.eloc_old;
overlap_alpha=w.overlap_alpha;
overlap_beta=w.overlap_beta;
old_overlap_alpha=w.old_overlap_alpha;
old_overlap_beta=w.old_overlap_beta;
return *this;
}
/// destructor
~SlaterDetWalker() {}
int sizeForDump() { return sizeForComm();}
// do I need to save *_old?
int sizeForComm() {
if(SlaterMat.size() == 0) return -1;
return sizeof(ComplexType)*(14+SlaterMat.size());
}
//bool restartFromXML() {}
//bool dumpToXML() {}
void clear() {}
void unpackFromChar(char *arr, ComplexMatrix& A, ComplexType& w, ComplexType& el, ComplexType& ov )
{
int n=0;
ComplexType o1;
A.resize(SlaterMat.rows(),SlaterMat.cols());
memcpy(&(w), arr ,sizeof(ComplexType)); n += sizeof(ComplexType)*2;
memcpy(&(el), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType)*4;
memcpy(&(o1), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType)*4;
memcpy(&(ov), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType)*4;
memcpy(A.data(), arr+n, A.size()*sizeof(ComplexType));
ov *= o1;
}
void restartFromChar(char *arr) {
int n=0;
memcpy(&(weight), arr ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(energy_full), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<0>(eloc)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<1>(eloc)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<2>(eloc)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<0>(eloc_old)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<0>(overlap_alpha)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<1>(overlap_alpha)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<2>(overlap_alpha)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<0>(old_overlap_alpha)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<0>(overlap_beta)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<1>(overlap_beta)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<2>(overlap_beta)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(&(std::get<0>(old_overlap_beta)), arr+n ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(SlaterMat.data(), arr+n, SlaterMat.size()*sizeof(ComplexType));
}
void dumpToChar(char* arr ) {
int n=0;
memcpy(arr, &(weight) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(energy_full) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<0>(eloc)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<1>(eloc)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<2>(eloc)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<0>(eloc_old)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<0>(overlap_alpha)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<1>(overlap_alpha)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<2>(overlap_alpha)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<0>(old_overlap_alpha)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<0>(overlap_beta)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<1>(overlap_beta)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<2>(overlap_beta)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, &(std::get<0>(old_overlap_beta)) ,sizeof(ComplexType)); n += sizeof(ComplexType);
memcpy(arr+n, SlaterMat.data(), SlaterMat.size()*sizeof(ComplexType));
}
void initWalker(ComplexMatrix& S, bool a=true)
{
init=true;
alive=a;
SlaterMat.resize(S.rows(),S.cols());
SlaterMat=S;
//weight=RealType(1.0);
weight=ComplexType(1.0);
eloc=std::forward_as_tuple(0.0,0.0,0.0);
eloc_old=std::forward_as_tuple(0.0,0.0,0.0);
overlap_alpha=std::forward_as_tuple(0.0,0.0,0.0);
overlap_beta=std::forward_as_tuple(0.0,0.0,0.0);
old_overlap_alpha=std::forward_as_tuple(0.0,0.0,0.0);
old_overlap_beta=std::forward_as_tuple(0.0,0.0,0.0);
}
bool alive;
bool init;
// Storage for alpha and beta Sater matrix
// Alpha from [0,NMO*NMO-1], beta from [NMO*NMO,2*NMO*NMO-1]
ComplexMatrix SlaterMat;
// weight
//RealType weight;
ComplexType weight;
// <phi|H|psi> / <psi|psi>
ComplexType energy_full;
// local energy: (importance sampling, phaseless, estimator)
std::tuple<ComplexType,ComplexType,ComplexType> eloc;
std::tuple<ComplexType,ComplexType,ComplexType> eloc_old;
// overlaps
std::tuple<ComplexType,ComplexType,ComplexType> overlap_alpha;
std::tuple<ComplexType,ComplexType,ComplexType> overlap_beta;
std::tuple<ComplexType,ComplexType,ComplexType> old_overlap_alpha;
std::tuple<ComplexType,ComplexType,ComplexType> old_overlap_beta;
};
}
#endif

View File

@ -0,0 +1,134 @@
// -*- C++ -*-
// /**@file WalkerHandler
// * @brief Virtual Class for walker handlers.
// */
#ifndef QMCPLUSPLUS_AFQMC_WALKERHANDLERBASE_H
#define QMCPLUSPLUS_AFQMC_WALKERHANDLERBASE_H
#include "OhmmsData/libxmldefs.h"
#include "io/hdf_archive.h"
#include"AFQMC/config.h"
#include <Message/MPIObjectBase.h>
//#include "Message/CommOperators.h"
namespace qmcplusplus
{
/*
* Class that contains and handles walkers.
* Implements communication, load balancing, and I/O operations.
* Walkers are always accessed through the handler.
*/
class WalkerHandlerBase: public MPIObjectBase, public AFQMCInfo
{
public:
/// constructor
WalkerHandlerBase(Communicate* c, std::string type=""): MPIObjectBase(c),name("")
,load_balance_alg("all"),core_rank(0),ncores_per_TG(1),walkerType(type)
{ }
/// destructor
~WalkerHandlerBase() {}
virtual int size()=0;
virtual bool restartFromHDF5(int n, hdf_archive&, const std::string&, bool set_to_target)=0;
virtual bool dumpToHDF5(hdf_archive&, const std::string&)=0;
virtual bool dumpSamplesHDF5(hdf_archive& dump, int nW)=0;
// reads xml and performs setup
virtual bool parse(xmlNodePtr cur)=0;
// performs setup
virtual bool setup(int a,int b,int c,MPI_Comm, MPI_Comm comm, MPI_Comm,myTimer*)=0;
// cleans state of object.
// -erases allocated memory
virtual bool clean()=0;
// called at the beginning of each executable section
virtual void resetNumberWalkers(int n, bool a=true, ComplexMatrix* S=NULL)=0;
virtual bool initWalkers(int n)=0;
virtual int numWalkers(bool dummy=false)=0;
virtual int numWalkers2() { return 0; }
virtual int GlobalPopulation()=0;
virtual RealType GlobalWeight()=0;
// load balancing algorithm
virtual void loadBalance()=0;
// population control algorithm
virtual void popControl()=0;
virtual void setHF(const ComplexMatrix& HF)=0;
virtual void Orthogonalize(int i)=0;
virtual void Orthogonalize()=0;
virtual ComplexType* getSM(int n)=0;
virtual ComplexType getWeight(int n)=0;
virtual ComplexType getEloc(int n)=0;
virtual ComplexType getEloc2(int n)=0;
virtual ComplexType getOldEloc(int n)=0;
virtual ComplexType getOldEloc2(int n)=0;
virtual ComplexType getOvlpAlpha(int n)=0;
virtual ComplexType getOvlpAlpha2(int n)=0;
virtual ComplexType getOvlpBeta(int n)=0;
virtual ComplexType getOvlpBeta2(int n)=0;
virtual ComplexType getOldOvlpAlpha(int n)=0;
virtual ComplexType getOldOvlpAlpha2(int n)=0;
virtual ComplexType getOldOvlpBeta(int n)=0;
virtual ComplexType getOldOvlpBeta2(int n)=0;
virtual void setWeight(int n, ComplexType Q)=0;
virtual void setEloc(int n, ComplexType Q)=0;
virtual void setEloc2(int n, ComplexType Q)=0;
virtual void setOldEloc(int n, ComplexType Q)=0;
virtual void setOldEloc2(int n, ComplexType Q)=0;
virtual void setOvlp(int n, ComplexType Q1, ComplexType Q2)=0;
virtual void setOvlp2(int n, ComplexType Q1, ComplexType Q2)=0;
virtual void setOldOvlp(int n, ComplexType Q1, ComplexType Q2)=0;
virtual void setOldOvlp2(int n, ComplexType Q1, ComplexType Q2)=0;
virtual void setCurrToOld(int n)=0;
virtual void setCurrToOld2(int n)=0;
virtual bool isAlive(int n)=0;
virtual void scaleWeight(RealType w0)=0;
virtual void setWalker(int,ComplexType,ComplexType)=0;
virtual void setWalker(int,ComplexType,ComplexType,ComplexType)=0;
virtual void setWalker(int,ComplexType,ComplexType,ComplexType,ComplexType)=0;
virtual void setWalker2(int,ComplexType,ComplexType,ComplexType)=0;
virtual ComplexType* getWalker(int,ComplexType&,ComplexType&,ComplexType&)=0;
virtual ComplexType* getWalker2(int,ComplexType&,ComplexType&,ComplexType&)=0;
virtual ComplexType* getWalker(int,ComplexType&,ComplexType&,ComplexType&,ComplexType&)=0;
virtual void getOldWalker(int,ComplexType&,ComplexType&,ComplexType&)=0;
// name of the object
std::string name;
// type of walker
std::string walkerType;
int nwalk_global, nwalk_min, nwalk_max;
std::vector<int> nwalk_counts_old, nwalk_counts_new;
// type of load balancing
std::string load_balance_alg;
int core_rank;
int ncores_per_TG;
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,223 @@
#ifndef QMCPLUSPLUS_AFQMC_GENERALSINGLEDETERMINANT_H
#define QMCPLUSPLUS_AFQMC_GENERALSINGLEDETERMINANT_H
#include <vector>
#include <map>
#include <string>
#include <iostream>
#include <tuple>
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "AFQMC/Wavefunctions/WavefunctionBase.h"
#include "AFQMC/Hamiltonians/SparseGeneralHamiltonian.h"
namespace qmcplusplus
{
/*
* Class that implements a pure single determinant trial wave-function.
* General means that the Slater matrix is composed of 0,1 only,
* meaning that it represents an eigenstate of the mean-field
* solution used to construct the hamiltonian.
*
*/
class GeneralSingleDeterminant: public WavefunctionBase
{
typedef WavefunctionBase* WfnPtr;
typedef GeneralSingleDeterminant ThisWfn;
typedef GeneralSingleDeterminant* ThisWfnPtr;
typedef HamiltonianBase* HamPtr;
typedef std::vector<IndexType>::iterator VIndexit;
typedef std::vector<s1D<ValueType> >::iterator s1Dit;
typedef std::vector<s2D<ValueType> >::iterator s2Dit;
typedef std::vector<s4D<ValueType> >::iterator s4Dit;
public:
GeneralSingleDeterminant(Communicate *c):WavefunctionBase(c),trialDensityMatrix_needsupdate(true),cutoff(1e-5),wfntype(0),uhf_walker(true)
{}
~GeneralSingleDeterminant() {}
bool setup(HamPtr cur);
bool parse(xmlNodePtr );
bool init(hdf_archive& read, const std::string& tag)
{
if(filetype == "none" || filetype == "" || init_type=="diagH1" || init_type=="ground")
return setup_local();
else if(filetype == "fcidump" || filetype == "ascii" || filetype == "sqc_ascii")
return initFromAscii(filename);
else if(filetype == "xml")
return initFromXML(filename);
else if(filetype == "hdf5") {
hdf_archive readF(myComm);
if(head_of_nodes)
if(!readF.open(filename,H5F_ACC_RDONLY,false))
APP_ABORT(" Problems reading hdf5 file in WavefunctionBase::init()");
if(!initFromHDF5(readF,hdf_read_tag)) {
app_error()<<" Problems reading hdf5 file in WavefunctionBase::init()";
APP_ABORT(" Problems reading hdf5 file in WavefunctionBase::init()");
return false;
}
readHamFromFile=true;
if(head_of_nodes) readF.close();
return true;
} else {
if(!initFromHDF5(read,tag)) {
app_error()<<" Problems reading restart file in WavefunctionBase::init()";
APP_ABORT(" Problems reading hdf5 file in WavefunctionBase::init()");
return false;
}
readHamFromFile=true;
return true;
}
app_error()<<" Could not find a wavefunction initialization type. \n";
return false;
}
//bool hdf_write(hdf_archive& read, const std::string& tag, bool include_tensors=true);
bool hdf_write();
void evaluateMeanFields() {}
void evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexMatrix& G) {}
void evaluateTwoBodyMixedDensityMatrix() {}
void evaluateLocalEnergy(const ComplexType* , ComplexType& , ComplexType&, ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1 );
void evaluateOverlap(const ComplexType* , ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1 );
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1);
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int ik0, int ikN, int pik0, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int ik0, int ikN, int pik0, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1 );
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1 );
//private:
bool setup_local();
bool initFromAscii(std::string fileName);
bool initFromHDF5(hdf_archive&,const std::string&);
bool initFromXML(std::string fileName) {}
bool getHamiltonian(HamPtr );
// evaluates and stores mixed density matrix in mixed_density_matrix
// this evaluates the mixed density matrix in reduced format (only NAEX*NMO non-zero sector)
void local_evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexType& ovl_alpha, ComplexType& ovl_beta);
void local_evaluateOneBodyTrialDensityMatrix();
ValueType NuclearCoulombEnergy;
RealType cutoff;
// vector that contains the list of orbitals occupied in the given Slater Matrix
// Indexes are 0-based.
// std::vector<IndexType> occup_alpha;
// std::vector<IndexType> occup_beta;
// std::vector<IndexType> virtual_alpha;
// std::vector<IndexType> virtual_beta;
// alternative storage for occupied states for easy access/lookup
// each MO (from 0...NMO-1) is mapped to either true/false based on occupation
// std::map<IndexType,bool> isOcc_alpha;
// std::map<IndexType,bool> isOcc_beta;
// 1RDM of the trial density matrix.
// Used to calculate mean-field energy and mean-field potentials
// This is a permutation of the identity matrix, do we need to store it???
bool trialDensityMatrix_needsupdate;
ComplexMatrix trial_density_matrix;
int wfntype;
bool uhf_walker;
// Local storage
// Notice that all these matrices are NMOxNAEX, since the rest of the NMO-NAEX columns are zero.
// Careful must be taken when returning results to other objects in the code.
ComplexMatrix overlap_inv;
ComplexMatrix mixed_density_matrix;
// temporary storage
ComplexMatrix S0,S1,SS0,T0, SM;
ComplexVector V0;
std::vector<ComplexType> Cwork;
std::vector<int> pivot;
// Slater matrices for the trial wavefunction
ComplexMatrix OrbMat;
// One-Body Hamiltonian. Stored in sparse form.
std::vector<s1D<ValueType> > hij;
// Storage for two body hamiltonian
// Tensor is stored in sparse form.
ComplexSpMat SpHijkl;
ComplexSMSpMat SMSpHijkl;
//
SparseGeneralHamiltonian* sHam;
/*
// This is only for debugging and setup.
// Should never be used inside execution loop
ValueType H(IndexType I, IndexType J) {
if( I < NMO && J < NMO ) {
IndexType indx = I*NMO+J;
// use binary search later
for(s1Dit it = hij.begin(); it<hij.end(); it++)
if(std::get<0>(*it) == indx) return std::get<1>(*it);
} else if( I >= NMO && J >= NMO ) {
IndexType indx = I*NMO+J-NMO;
// use binary search later
for(s1Dit it = hij.begin(); it<hij.end(); it++)
if(std::get<0>(*it) == indx) return std::get<1>(*it);
}
return static_cast<ValueType>(0.0);
}
// This is only for debugging and setup.
// Should never be used inside execution loop
ValueType H(IndexType I, IndexType J, IndexType K, IndexType L) {
#ifdef AFQMC_DEBUG
// probably a good idea to check that (I,K) and (J,L) belong to same spin
#endif
ValueType scl = static_cast<ValueType>(1.0);
IndexType indx1 = (I<NMO)?(I*NMO+K):(I*NMO+K-NMO);
IndexType indx2 = (J<NMO)?(J*NMO+L):(J*NMO+L-NMO);
// only do this is you eliminate redundant pairs from list by *2.0
//if( !(I==J && K==L) && ((I<NMO && J<NMO) || (I>=NMO && J>=NMO)) ) scl *= static_cast<ValueType>(0.5);
if( !(I==J && K==L) ) scl *= static_cast<ValueType>(0.5);
for(s2Dit it = Vijkl.begin(); it<Vijkl.end(); it++)
if( (std::get<0>(*it) == indx1 && std::get<1>(*it) == indx2) ||
(std::get<0>(*it) == indx2 && std::get<1>(*it) == indx1) ) return std::get<2>(*it)*scl;
return static_cast<ValueType>(0.0);
}
*/
inline IndexType Index2Mat(IndexType I, IndexType J) {
return (J<NMO)?(I*NMO+J):(I*NMO+J-NMO);
}
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,264 @@
#ifndef QMCPLUSPLUS_AFQMC_MULTIPURESINGLEDETERMINANT_H
#define QMCPLUSPLUS_AFQMC_MULTIPURESINGLEDETERMINANT_H
#include <vector>
#include <map>
#include <string>
#include <iostream>
#include <tuple>
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "AFQMC/Wavefunctions/WavefunctionBase.h"
#include "AFQMC/Hamiltonians/SparseGeneralHamiltonian.h"
namespace qmcplusplus
{
/*
* Class that implements a linear combination of
* pure single determinant trial wave-function.
* Pure means that the Slater matrix is composed of 0,1 only,
* meaning that it represents an eigenstate of the mean-field
* solution used to construct the hamiltonian.
* Many tricks are used to minimize computation and storage
* including sparse evalutation and low-rank updates
*/
class MultiPureSingleDeterminant: public WavefunctionBase
{
typedef WavefunctionBase* WfnPtr;
typedef MultiPureSingleDeterminant ThisWfn;
typedef MultiPureSingleDeterminant* ThisWfnPtr;
typedef HamiltonianBase* HamPtr;
typedef std::vector<IndexType>::iterator VIndexit;
typedef std::vector<s1D<ValueType> >::iterator s1Dit;
typedef std::vector<s2D<ValueType> >::iterator s2Dit;
typedef std::vector<s4D<ValueType> >::iterator s4Dit;
public:
MultiPureSingleDeterminant(Communicate *c):WavefunctionBase(c),trialDensityMatrix_needsupdate(true),ref(0),max_excitation(0),cutoff(1e-5),runtype(0),rotated_hamiltonian(false),wfntype(0),diagHam(true),diag_in_steps(0),iterCI(false)
{}
~MultiPureSingleDeterminant() {}
bool parse(xmlNodePtr);
bool setup (HamPtr cur) {
return getHamiltonian(cur);
}
bool hdf_write(hdf_archive& read, const std::string& tag, bool include_tensors=true);
bool hdf_write();
void evaluateMeanFields() {}
void evaluateTrialEnergy(ComplexType& ke, ComplexType& pe);
void evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexMatrix& G) {}
void evaluateTwoBodyMixedDensityMatrix() {}
void evaluateLocalEnergy(const ComplexType* , ComplexType& , ComplexType&, ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1 );
void evaluateOverlap(const ComplexType* , ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1 );
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1 );
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1 );
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int ik0, int ikN, int pik0, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int ik0, int ikN, int pik0, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1 );
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1 );
ComplexType local_evaluateOverlapSlaterDet(int detn, const ComplexType* SlaterMat);
int cntExcitations(std::vector<IndexType>&,std::vector<IndexType>&,IndexType&,IndexType&,IndexType&,IndexType&,std::vector<IndexType>&,RealType&);
// very simple algorithm to generate a ci expansion in orthogonal space iteratively
void iterativeCI(double cutoff, int nmax, int nmax_int, int maxit);
bool check_occ_orbs() {
if(rotated_hamiltonian) return true;
int ne=NAEA+NAEB,nci = ci.size();
if(occ_orbs.size()/ne != nci) {
app_error()<<" Error in check_occ_orbs: nci != orbs.size(): " <<nci <<" " <<occ_orbs.size() <<std::endl;
return false;
}
std::vector<IndexType>::iterator it = occ_orbs.begin();
for(int i=0; i<nci; i++) {
for(int k=0; k<NAEA; k++,it++)
if( *it < 0 || *it >= NMO ) {
app_log()<<" Error in check_occ_orbs: det, occ: " <<i <<" ";
for(int j=0; j<ne; j++) app_log()<<occ_orbs[i*ne+j] <<" ";
app_log()<<std::endl;
return false;
}
for(int k=0; k<NAEB; k++,it++)
if( *it < NMO || *it >= 2*NMO ) {
app_error()<<" Error in check_occ_orbs: det, occ: " <<i <<" ";
for(int j=0; j<ne; j++) app_error()<<occ_orbs[i*ne+j] <<" ";
app_error()<<std::endl;
return false;
}
}
return true;
}
private:
RealType cutoff;
bool iterCI;
int IterCI_maxit;
double IterCI_cut;
bool initFromAscii(std::string fileName);
bool initFromHDF5(hdf_archive&,const std::string&) {}
bool initFromXML(std::string fileName) {}
bool getHamiltonian(HamPtr );
// evaluates and stores mixed density matrix in mixed_density_matrix
// this evaluates the mixed density matrix in reduced format (only NAEX*NMO non-zero sector)
void local_evaluateOneBodyMixedDensityMatrix(int, const ComplexType* SlaterMat, ComplexType& ovl_alpha, ComplexType& ovl_beta, ComplexMatrix&, bool full=false);
void local_evaluateOneBodyMixedDensityMatrixFull(const ComplexType* SlaterMat, ComplexType& ovl, ComplexMatrix&, bool full=false);
void local_rankUpdateOneBodyMixedDensityMatrix(const int ndet, const ComplexType* SlaterMat, ComplexType& ovl_alpha, ComplexType& ovl_beta, bool full=false);
void local_evaluateOneBodyTrialDensityMatrix(bool full=false);
void local_rankUpdateOneBodyTrialDensityMatrix(int n, bool full=false);
bool diagonalizeTrialWavefunction(std::vector<RealType>& eigVal, ComplexMatrix& eigVec, std::vector<IndexType>& occ, int nci, bool eigV=true);
bool diagHam;
ValueType NuclearCoulombEnergy;
// for every determinant,
// vector that contains the list of orbitals occupied in the given Slater Matrix
// Indexes are 0-based.
// Only used for setup/initialization, NOT FOR EXECUTION!!!!
//std::vector<vector<IndexType> > occup_alpha;
//std::vector<vector<IndexType> > occup_beta;
//std::vector<vector<IndexType> > virtual_alpha;
//std::vector<vector<IndexType> > virtual_beta;
// alternative storage for occupied states for easy access/lookup
// each MO (from 0...NMO-1) is mapped to either true/false based on occupation
//std::vector<map<IndexType,bool> > isOcc_alpha;
//std::vector<map<IndexType,bool> > isOcc_beta;
// for testing purposes only!!!
ComplexMatrix StoreSM;
bool rotated_hamiltonian;
int wfntype;
std::vector<ComplexType> OrbMat;
int orbsize;
int diag_in_steps;
bool runtype;
// 0: single copy of hamiltonian with extra computational cost
// 1: full storage of all hamiltonians (only option for rotated_hamiltonian)
int max_excitation;
IndexType ref;
// determinant coefficients
std::vector<ComplexType> ci;
// stores differences wrt to reference determinant
// stored continuously, tricky to access so careful
std::vector<IndexType> ijab;
// used to determine the number of excitations and their location in the list
std::vector<s2D<IndexType> > excitation_bounds;
std::vector<IndexType> occ_orbs;
std::vector<IndexType> occ_pairs;
// storage for results
std::vector<ComplexType> overlaps, pe, ke;
// 1RDM of the trial density matrix.
// Used to calculate mean-field energy and mean-field potentials
// This is a permutation of the identity matrix, do we need to store it???
bool trialDensityMatrix_needsupdate;
// this will always mean the reference determinant trial_density_matrix
ComplexMatrix trial_density_matrix;
ComplexMatrix rank_updated_trial_density_matrix;
// Local storage
// Notice that all these matrices are NMOxNAEX, since the rest of the NMO-NAEX columns are zero.
// Careful must be taken when returning results to other objects in the code.
ComplexMatrix overlap_inv;
// this will always mean the reference determinant mixed_density_matrix
ComplexMatrix full_mixed_density_matrix;
ComplexMatrix mixed_density_matrix;
ComplexMatrix rank_updated_mixed_density_matrix;
// temporary storage
ComplexMatrix S0,S1,SS0;
ComplexVector V0;
std::vector<IndexType> Iwork;
std::vector<ComplexType> Cwork;
std::vector<int> pivot;
// One-Body Hamiltonian. Stored in sparse form.
// Contains all possible terms used by all determinants
// Terms are sorted to allow systematic access
std::vector<std::vector<s1D<ValueType> > > hij;
std::vector<std::vector<s1D<ComplexType> > > haj;
// list of pointers defining the beginning and end
// of the elements with first index i
std::vector<s1Dit > hij_indx;
// Storage for two body hamiltonian
// Tensor is stored in sparse form.
// Contains all possible terms used by all determinants
// Terms are sorted to allow systematic access
std::vector<s2D<ValueType> > Vijkl;
std::vector<ComplexSMSpMat> SMSpHijkl;
// list of pointers defining the beginning and end
// of the elements with first index i
std::vector<s2Dit > Vijkl_indx;
std::vector<int> Vijkl_nterms_per_det;
// pointer to hamiltonian object
SparseGeneralHamiltonian* sHam;
inline IndexType Index2Mat(IndexType I, IndexType J) {
return (J<NMO)?(I*NMO+J):(I*NMO+J-NMO);
}
inline void Mat2Index(const IndexType IJ, IndexType& I, IndexType& J) {
if( IJ < NMO*NMO) {
I = IJ/NMO;
J = IJ%NMO;
} else {
I = (IJ-NMO*NMO)/NMO+NMO;
J = (IJ-NMO*NMO)%NMO+NMO;
}
}
};
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,236 @@
#ifndef QMCPLUSPLUS_AFQMC_PURESINGLEDETERMINANT_H
#define QMCPLUSPLUS_AFQMC_PURESINGLEDETERMINANT_H
#include <vector>
#include <map>
#include <string>
#include <iostream>
#include <tuple>
#include "AFQMC/config.h"
#include <Message/MPIObjectBase.h>
#include "AFQMC/Wavefunctions/WavefunctionBase.h"
#include "AFQMC/Hamiltonians/SparseGeneralHamiltonian.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
namespace qmcplusplus
{
/*
* Class that implements a pure single determinant trial wave-function.
* Pure means that the Slater matrix is composed of 0,1 only,
* meaning that it represents an eigenstate of the mean-field
* solution used to construct the hamiltonian.
*
*/
class PureSingleDeterminant: public WavefunctionBase
{
typedef WavefunctionBase* WfnPtr;
typedef PureSingleDeterminant ThisWfn;
typedef PureSingleDeterminant* ThisWfnPtr;
typedef HamiltonianBase* HamPtr;
typedef std::vector<IndexType>::iterator VIndexit;
typedef std::vector<s1D<ValueType> >::iterator s1Dit;
typedef std::vector<s2D<ValueType> >::iterator s2Dit;
typedef std::vector<s4D<ValueType> >::iterator s4Dit;
public:
PureSingleDeterminant(Communicate *c):WavefunctionBase(c),trialDensityMatrix_needsupdate(true),cutoff(1e-5),
setup_vn_occ_indx(true),rotated_hamiltonian(false),wfntype(0)
{}
~PureSingleDeterminant() {}
bool setup(HamPtr cur);
bool parse(xmlNodePtr );
//bool hdf_write(hdf_archive& read, const std::string& tag, bool include_tensors=true);
bool hdf_write();
int sizeOfInfoForDistributedPropagation()
{
if(closed_shell)
if(rotated_hamiltonian)
return 2+NMO*NMO; // green function becomes dense in evaluation of vbias with rotated_hamiltonian
else
return 2+NAEA*NMO;
else
if(rotated_hamiltonian)
return 2+2*NMO*NMO;
else
return 2+(NAEA+NAEB)*NMO;
}
void evaluateMeanFields() {}
void evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexMatrix& G) {}
void evaluateTwoBodyMixedDensityMatrix() {}
void evaluateLocalEnergy(const ComplexType* , ComplexType& , ComplexType&, ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1 );
void dist_evaluateLocalEnergy(WalkerHandlerBase* wset , bool first, const int n=-1 );
void evaluateLocalEnergy(bool addBetaBeta, RealType dt, const ComplexType* , const ComplexSMSpMat&, ComplexType& , ComplexType&, ComplexType& ovl_alpha, ComplexType& ovl_beta, bool, const int n=-1 );
void evaluateOverlap(const ComplexType* , ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1 );
void dist_evaluateOverlap(WalkerHandlerBase* wset, bool first, const int n=-1 );
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1);
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int i0, int iN, int pi0, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int i0, int iN, int pi0, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1);
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1 );
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1 );
void evaluateOneBodyMixedDensityMatrix(WalkerHandlerBase* wset, ComplexSMVector* buf, int wlksz, int gfoffset, bool full=true);
bool isOccupAlpha( int i) {
return (isOcc_alpha.find(i) != isOcc_alpha.end());
}
bool isOccupBeta( int i) {
return (isOcc_beta.find(i) != isOcc_beta.end());
}
//private:
bool setup_local();
bool initFromAscii(std::string fileName);
bool initFromHDF5(hdf_archive&,const std::string&);
bool initFromXML(std::string fileName) {}
bool getHamiltonian(HamPtr );
// evaluates and stores mixed density matrix in mixed_density_matrix
// this evaluates the mixed density matrix in reduced format (only NAEX*NMO non-zero sector)
void local_evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexType& ovl_alpha, ComplexType& ovl_beta, bool full=true);
void local_evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexType& ovl_alpha, ComplexType& ovl_beta, ComplexType* dm, bool full=true);
void split_Ham_rows(IndexType, ComplexSMSpMat::int_iterator, IndexType&, IndexType&);
void local_evaluateOneBodyTrialDensityMatrix(bool full=true);
ValueType NuclearCoulombEnergy;
RealType cutoff;
int wfntype;
bool rotated_hamiltonian;
ComplexMatrix OrbMat;
std::vector<ComplexType> T1;
bool setup_vn_occ_indx; // currently a hack
std::vector<std::tuple<int,int,int>> vn_occ_ik;
std::vector<std::tuple<int,int,int>> vn_occ_lj;
// vector that contains the list of orbitals occupied in the given Slater Matrix
// Indexes are 0-based.
std::vector<IndexType> occup_alpha;
std::vector<IndexType> occup_beta;
// std::vector<IndexType> virtual_alpha;
// std::vector<IndexType> virtual_beta;
// alternative storage for occupied states for easy access/lookup
// each MO (from 0...NMO-1) is mapped to either true/false based on occupation
std::map<IndexType,bool> isOcc_alpha;
std::map<IndexType,bool> isOcc_beta;
// 1RDM of the trial density matrix.
// Used to calculate mean-field energy and mean-field potentials
// This is a permutation of the identity matrix, do we need to store it???
bool trialDensityMatrix_needsupdate;
ComplexMatrix trial_density_matrix;
// Local storage
// Notice that all these matrices are NMOxNAEX, since the rest of the NMO-NAEX columns are zero.
// Careful must be taken when returning results to other objects in the code.
ComplexMatrix overlap_inv;
ComplexMatrix mixed_density_matrix;
std::vector<ComplexType> local_buff;
// temporary storage
ComplexMatrix S0,S1,SS0;
ComplexVector V0;
// One-Body Hamiltonian. Stored in sparse form.
std::vector<s1D<ValueType> > hij;
std::vector<s1D<ComplexType> > haj;
// Storage for two body hamiltonian
// Tensor is stored in sparse form.
ComplexSpMat SpHijkl;
ComplexSMSpMat SMSpHijkl;
// ik breakup of Spvn
IndexType ik0, ikN; // minimum and maximum values of ik index in Spvn
IndexType pik0; // locations of bounds of ik0 sector in Spvn
//
SparseGeneralHamiltonian* sHam;
std::vector<IndexType> Iwork;
std::vector<ComplexType> Cwork;
std::vector<int> pivot;
/*
// This is only for debugging and setup.
// Should never be used inside execution loop
ValueType H(IndexType I, IndexType J) {
if( I < NMO && J < NMO ) {
IndexType indx = I*NMO+J;
// use binary search later
for(s1Dit it = hij.begin(); it<hij.end(); it++)
if(std::get<0>(*it) == indx) return std::get<1>(*it);
} else if( I >= NMO && J >= NMO ) {
IndexType indx = I*NMO+J-NMO;
// use binary search later
for(s1Dit it = hij.begin(); it<hij.end(); it++)
if(std::get<0>(*it) == indx) return std::get<1>(*it);
}
return static_cast<ValueType>(0.0);
}
// This is only for debugging and setup.
// Should never be used inside execution loop
ValueType H(IndexType I, IndexType J, IndexType K, IndexType L) {
#ifdef AFQMC_DEBUG
// probably a good idea to check that (I,K) and (J,L) belong to same spin
#endif
ValueType scl = static_cast<ValueType>(1.0);
IndexType indx1 = (I<NMO)?(I*NMO+K):(I*NMO+K-NMO);
IndexType indx2 = (J<NMO)?(J*NMO+L):(J*NMO+L-NMO);
// only do this is you eliminate redundant pairs from list by *2.0
//if( !(I==J && K==L) && ((I<NMO && J<NMO) || (I>=NMO && J>=NMO)) ) scl *= static_cast<ValueType>(0.5);
if( !(I==J && K==L) ) scl *= static_cast<ValueType>(0.5);
for(s2Dit it = Vijkl.begin(); it<Vijkl.end(); it++)
if( (std::get<0>(*it) == indx1 && std::get<1>(*it) == indx2) ||
(std::get<0>(*it) == indx2 && std::get<1>(*it) == indx1) ) return std::get<2>(*it)*scl;
return static_cast<ValueType>(0.0);
}
*/
inline IndexType Index2Mat(IndexType I, IndexType J) {
return (J<NMO)?(I*NMO+J):(I*NMO+J-NMO);
}
};
}
#endif

View File

@ -0,0 +1,258 @@
#ifndef QMCPLUSPLUS_AFQMC_WAVEFUNCTIONBASE_H
#define QMCPLUSPLUS_AFQMC_WAVEFUNCTIONBASE_H
#include "AFQMC/config.h"
#include<Message/MPIObjectBase.h>
#include "AFQMC/Hamiltonians/HamiltonianBase.h"
#include "io/hdf_archive.h"
#include "AFQMC/Utilities/taskgroup.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
namespace qmcplusplus
{
// Eventually make this a template to handle walker types
class WavefunctionBase: public MPIObjectBase, public AFQMCInfo
{
typedef WavefunctionBase* WfnPtr;
typedef HamiltonianBase* HamPtr;
public:
WavefunctionBase(Communicate *c):MPIObjectBase(c),readHamFromFile(false),
hdf_write_file(""),hdf_read_tag(""),hdf_write_tag(""),wfn_role(""),closed_shell(false),
filetype(""),TG(c,"WavefunctionTG"),distribute_Ham(false),min_ik(-1),max_ik(-1),ncores_per_TG(1),
core_rank(0),nnodes_per_TG(1),parallel(false)
{
}
~WavefunctionBase() {}
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
virtual bool init(std::vector<int>& TGdata, ComplexSMVector *v, hdf_archive& read, const std::string& tag, MPI_Comm tg_comm, MPI_Comm node_comm)
{
// setup TG
ncores_per_TG=TGdata[4];
if(nnodes_per_TG > 1) distribute_Ham=true;
if(nnodes_per_TG > 1 || ncores_per_TG >1) parallel=true;
if(!TG.quick_setup(ncores_per_TG,nnodes_per_TG,TGdata[0],TGdata[1],TGdata[2],TGdata[3]))
return false;
TG.setBuffer(v);
core_rank = TG.getCoreRank();
TG.setNodeCommLocal(node_comm);
TG.setTGCommLocal(tg_comm);
// setup WFN
if(filetype == "none" && init_type=="ground")
return setup_local();
else if(filetype == "fcidump" || filetype == "ascii" || filetype == "sqc_ascii")
return initFromAscii(filename);
else if(filetype == "xml")
return initFromXML(filename);
else if(filetype == "hdf5") {
hdf_archive readF(myComm);
if(head_of_nodes)
if(!readF.open(filename,H5F_ACC_RDONLY,false))
APP_ABORT(" Problems reading hdf5 file in WavefunctionBase::init()");
if(!initFromHDF5(readF,hdf_read_tag)) {
app_error()<<" Problems reading hdf5 file in WavefunctionBase::init()";
APP_ABORT(" Problems reading hdf5 file in WavefunctionBase::init()");
return false;
}
readHamFromFile=true;
if(head_of_nodes) readF.close();
return true;
} else {
if(!initFromHDF5(read,tag)) {
app_error()<<" Problems reading restart file in WavefunctionBase::init()";
APP_ABORT(" Problems reading hdf5 file in WavefunctionBase::init()");
return false;
}
readHamFromFile=true;
return true;
}
app_error()<<" Could not find a wavefunction initialization type. \n";
return false;
}
bool isClosedShell() {return closed_shell;}
//virtual bool hdf_write(hdf_archive& read, const std::string& tag, bool include_tensors=true)=0;
virtual bool hdf_write()=0;
virtual bool setup(HamPtr)=0;
virtual bool parse(xmlNodePtr)=0;
ComplexMatrix& getHF() { return HF; }
virtual int sizeOfInfoForDistributedPropagation()
{
APP_ABORT("WavefunctionBase::sizeOfInfoForDistributedPropagation() not implemented for this wavefunction type. \n");
}
virtual void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1 )=0;
virtual void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1 )=0;
// no need to reimplement this in derived class
void evaluateLocalEnergy(WalkerHandlerBase* wset, bool first , const int n=-1)
{
if(parallel)
dist_evaluateLocalEnergy(wset,first,n);
else
serial_evaluateLocalEnergy(wset,first,n);
}
// no need to reimplement this in derived class
void serial_evaluateLocalEnergy(WalkerHandlerBase* wset, bool first, const int n=-1)
{
int nw = wset->numWalkers(true);
if(nw==0) return;
ComplexType ekin,epot,ovlp_a,ovlp_b;
for(int i=0; i<nw; i++) {
if(!wset->isAlive(i) || std::abs(wset->getWeight(i)) <= 1e-6) continue;
evaluateLocalEnergy(wset->getSM(i),ekin,epot,ovlp_a,ovlp_b,n);
if(first)
wset->setWalker(i,ekin+epot,ovlp_a,ovlp_b);
else {
wset->setEloc2(i,ekin+epot);
wset->setOvlp2(i,ovlp_a,ovlp_b);
}
}
}
virtual void dist_evaluateLocalEnergy(WalkerHandlerBase* wset, bool first, const int n=-1)
{ APP_ABORT("WavefunctionBase::dist_evaluateLocalEnergy not implemented for this wavefunction type. \n"); }
virtual void evaluateLocalEnergy(const ComplexType* SlaterMat, ComplexType& ekin, ComplexType& epot, ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1)=0;
virtual void evaluateLocalEnergy(bool addBetaBeta, RealType dt, const ComplexType* SlaterMat, const ComplexSMSpMat& Spvn, ComplexType& ekin, ComplexType& epot, ComplexType& ovl_alpha, ComplexType& ovl_beta, bool transposed, const int n=-1)
{ APP_ABORT("WavefunctionBase::evaluateLocalEnergy with factorized H not implemented for this wavefunction type. \n"); }
// no need to reimplement this in derived class
void evaluateOverlap(WalkerHandlerBase* wset, bool first, const int n=-1)
{
if(parallel)
dist_evaluateOverlap(wset,first,n);
else
serial_evaluateOverlap(wset,first,n);
}
virtual void dist_evaluateOverlap(WalkerHandlerBase* wset, bool first, const int n=-1)
{ APP_ABORT("WavefunctionBase::dist_evaluateOverlap not implemented for this wavefunction type. \n"); }
// no need to reimplement this in derived class
void serial_evaluateOverlap(WalkerHandlerBase* wset, bool first, const int n=-1)
{
int nw = wset->numWalkers(true);
if(nw==0) return;
ComplexType ovlp_a,ovlp_b;
for(int i=0; i<nw; i++) {
if(!wset->isAlive(i) || std::abs(wset->getWeight(i)) <= 1e-6) continue;
evaluateOverlap(wset->getSM(i),ovlp_a,ovlp_b,n);
if(first)
wset->setOvlp(i,ovlp_a,ovlp_b);
else
wset->setOvlp2(i,ovlp_a,ovlp_b);
}
}
virtual void evaluateOverlap(const ComplexType* SlaterMat, ComplexType& ovl_alpha, ComplexType& ovl_beta, const int n=-1)=0;
virtual void evaluateOneBodyMixedDensityMatrix(WalkerHandlerBase* wset, ComplexSMVector* buf, int wlksz, int gfoffset, bool full=true) {
APP_ABORT(" Error: evaluateOneBodyMixedDensityMatrix not implemented for this wave-function. \n\n\n");
}
virtual void evaluateOneBodyMixedDensityMatrix(const ComplexType* SlaterMat, ComplexMatrix& G)=0;
virtual void evaluateTwoBodyMixedDensityMatrix()=0;
virtual void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1)=0;
virtual void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1)=0;
virtual void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int ik0, int ikN, int pik0, ComplexSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1)=0;
virtual void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const ComplexType* buff, int ik0, int ikN, int pik0, ComplexSMSpMat&, std::vector<ComplexType>& v, bool transposed, bool needsG, const int n=-1)=0;
virtual void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSpMat&, std::vector<ComplexType>& v, const int n=-1 )=0;
virtual void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const ComplexType* SlaterMat, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, ComplexSMSpMat&, std::vector<ComplexType>& v, const int n=-1 )=0;
std::string name;
std::string wfn_role;
virtual bool check_occ_orbs() {return true; }
virtual bool isOccupAlpha( int i) { return true; }
virtual bool isOccupBeta(int i) { return true; }
void setCommBuffer(ComplexSMVector& bf)
{
//commBuff = bf;
}
protected:
virtual bool setup_local() {
APP_ABORT(" Error: type=none not allowed for this wavefunction type. \n");
return false;
}
TaskGroup TG;
bool distribute_Ham;
bool parallel;
int min_ik, max_ik;
int core_rank,ncores_per_TG;
int nnodes_per_TG;
std::string filename;
std::string filetype;
std::string init_type;
std::string hdf_write_file;
std::string hdf_read_tag;
std::string hdf_write_tag;
bool closed_shell;
ComplexMatrix HF;
bool readHamFromFile;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
virtual bool initFromAscii(std::string fileName)=0;
virtual bool initFromXML(std::string fileName)=0;
virtual bool initFromHDF5(hdf_archive&,const std::string&)=0;
virtual bool getHamiltonian(HamPtr )=0;
// used to identify the current step.
// The main purpose of this is to tellthe different
// wavefunction objects whether we are in the same step
// or not. This will allow us to reuse information already
// calculated in a previous section of the current step.
// e.g. Not to recalculate density matrices if we are redoing
// a local energy calculation on the same step.
// Specially useful for multideterminant calculations
int time_stamp;
// Hamiltonian object associated with these wavefuncitons
// The hamiltonian gives access to matrix elements,
// but trial wavefunction related quantities are calculated
// in this class.
// Make sure that there is consistency between the type of hamiltonian
// and the type of wavefunction object, e.g. sparse versus full matrix
HamPtr ham0;
};
}
#endif

View File

@ -0,0 +1,106 @@
#include "OhmmsData/libxmldefs.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "AFQMC/config.h"
#include<Message/MPIObjectBase.h>
#include "AFQMC/Wavefunctions/WavefunctionHandler.h"
#include "AFQMC/Wavefunctions/WavefunctionBase.h"
#include "AFQMC/Wavefunctions/PureSingleDeterminant.h"
#include "AFQMC/Wavefunctions/GeneralSingleDeterminant.h"
#include "AFQMC/Wavefunctions/MultiPureSingleDeterminant.h"
namespace qmcplusplus
{
bool WavefunctionHandler::parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
std::string str("no");
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.add(str,"new_algo");
oAttrib.put(cur);
if(str=="yes") new_algo=true;
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="ImpSamp") {
ImpSampWfn = addWfn(cur);
ImpSampWfn->wfn_role += std::string("ImpSamp");
} else if(cname =="Estimator") {
EstimatorWfn = addWfn(cur);
EstimatorWfn->wfn_role += std::string("Estimator");
app_log()<<" Successfully created Estimator wfn: " <<EstimatorWfn->name <<" " <<EstimatorWfn->wfn_role <<std::endl;
//} else if(cname =="phaseless") {
// phaselessWfn = addWfn(cur);
}
cur = cur->next;
}
return true;
}
WavefunctionHandler::WfnPtr WavefunctionHandler::addWfn(xmlNodePtr cur)
{
if(cur==NULL) return NULL;
std::string id("");
std::string type("");
OhmmsAttributeSet aAttrib;
aAttrib.add(type,"type");
aAttrib.add(id,"name");
aAttrib.add(id,"id");
aAttrib.put(cur);
// look for object with same id
for(int i=0; i<wfns.size(); i++)
if(id == wfns[i]->name) return wfns[i];
WfnPtr it;
if(type == "PureSD")
it = (WavefunctionBase*) new PureSingleDeterminant(myComm);
else if(type == "GenSD")
it = (WavefunctionBase*) new GeneralSingleDeterminant(myComm);
else if(type == "MultiPureSD")
it = (WavefunctionBase*) new MultiPureSingleDeterminant(myComm);
it->parse(cur);
it->setHeadComm(head_of_nodes,MPI_COMM_HEAD_OF_NODES);
wfns.push_back(it);
return it;
}
bool WavefunctionHandler::init(std::vector<int>& TGdata, ComplexSMVector *v, hdf_archive& read, const std::string& tag, MPI_Comm tg_comm, MPI_Comm node_comm)
{
for(int i=0; i<wfns.size(); i++) {
wfns[i]->copyInfo(*this);
if(!wfns[i]->init(TGdata,v,read,tag,tg_comm,node_comm))
return false;
}
return true;
}
bool WavefunctionHandler::setup(HamPtr ham)
{
for(int i=0; i<wfns.size(); i++)
if(!wfns[i]->setup(ham))
return false;
return true;
}
} // qmcplusplus

View File

@ -0,0 +1,389 @@
#ifndef QMCPLUSPLUS_AFQMC_WAVEFUNCTIONHANDLER_H
#define QMCPLUSPLUS_AFQMC_WAVEFUNCTIONHANDLER_H
#include "OhmmsData/libxmldefs.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include "io/hdf_archive.h"
#include "AFQMC/config.h"
#include<Message/MPIObjectBase.h>
#include "AFQMC/Wavefunctions/WavefunctionBase.h"
#include "AFQMC/Wavefunctions/PureSingleDeterminant.h"
#include "AFQMC/Walkers/WalkerHandlerBase.h"
//#include "AFQMC/Walkers/SlaterDetWalker.h"
namespace qmcplusplus
{
// Eventually make this a template to handle walker types
class WavefunctionHandler: public MPIObjectBase, public AFQMCInfo
{
typedef WavefunctionBase* WfnPtr;
typedef HamiltonianBase* HamPtr;
public:
WavefunctionHandler(Communicate *c):MPIObjectBase(c),name(""),ham0(NULL),phaselessWfn(NULL),ImpSampWfn(NULL),EstimatorWfn(NULL),distribute_Ham(false),core_rank(0),ncores_per_TG(1),new_algo(false)
{
wfns.reserve(10);
}
~WavefunctionHandler() {}
bool parse(xmlNodePtr cur);
ComplexMatrix& getHF() {
return ImpSampWfn->getHF();
}
void setHeadComm(bool hd, MPI_Comm comm) {
head_of_nodes=hd;
MPI_COMM_HEAD_OF_NODES = comm;
}
bool init(std::vector<int>& TGdata, ComplexSMVector *v,hdf_archive&,const std::string&, MPI_Comm, MPI_Comm);
bool setup(HamPtr);
WfnPtr addWfn(xmlNodePtr cur);
void evaluateMeanFields() {}
bool isClosedShell(const std::string& type) {
if(type == std::string("ImportanceSampling")) {
return ImpSampWfn->isClosedShell();
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
return EstimatorWfn->isClosedShell();
}
APP_ABORT("Undefined wavefunction in isClosedShell('Estimator') \n\n\n");
} else {
APP_ABORT("Unknown wavefunction type in isClosedShell(). \n");
}
return false;
}
inline int sizeOfInfoForDistributedPropagation(const std::string& type) {
if(type == std::string("ImportanceSampling")) {
return ImpSampWfn->sizeOfInfoForDistributedPropagation();
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
return EstimatorWfn->sizeOfInfoForDistributedPropagation();
}
APP_ABORT("Undefined wavefunction in sizeOfInfoForDistributedPropagation('Estimator') \n\n\n");
} else {
APP_ABORT("Unknown wavefunction type in sizeOfInfoForDistributedPropagation(). \n");
}
return 0;
}
inline void evaluateLocalEnergyAndOverlap(const std::string& type, const int n, WalkerHandlerBase* wset)
{
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateLocalEnergy(wset,true,n);
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
EstimatorWfn->evaluateLocalEnergy(wset,false,n);
} else {
int nw = wset->numWalkers(true);
for(int i=0; i<nw; i++)
wset->setEloc2(i,ComplexType(0,0));
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap(wset). \n");
}
/*
if(new_algo) {
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateLocalEnergy(wset,n);
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
EstimatorWfn->evaluateLocalEnergy(wset,n);
} else {
for(int i=0; i<nw; i++)
wset->setEloc2(i,ComplexType(0,0));
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap. \n");
}
} else {
if(type == std::string("ImportanceSampling")) {
for(int i=0; i<nw; i++) {
if(!wset->isAlive(i) || std::abs(wset->getWeight(i)) <= 1e-6) continue;
ImpSampWfn->evaluateLocalEnergy(wset->getSM(i),ekin,epot,ovlp_a,ovlp_b,n);
wset->setWalker(i,ekin+epot,ovlp_a,ovlp_b);
}
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
for(int i=0; i<nw; i++) {
if(!wset->isAlive(i) || std::abs(wset->getWeight(i)) <= 1e-6) continue;
EstimatorWfn->evaluateLocalEnergy(wset->getSM(i),ekin,epot,ovlp_a,ovlp_b,n);
wset->setEloc2(i,ekin+epot);
wset->setOvlp2(i,ovlp_a,ovlp_b);
}
} else {
for(int i=0; i<nw; i++)
wset->setEloc2(i,ComplexType(0,0));
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap. \n");
}
}
*/
}
inline void evaluateLocalEnergyAndOverlap(const std::string& type, const int n, ComplexType* SM, ComplexType& eloc, ComplexType& ovlp_a, ComplexType& ovlp_b)
{
ComplexType ekin,epot;
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateLocalEnergy(SM,ekin,epot,ovlp_a,ovlp_b,n);
eloc = ekin+epot;
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
EstimatorWfn->evaluateLocalEnergy(SM,ekin,epot,ovlp_a,ovlp_b,n);
eloc = ekin+epot;
} else {
eloc = 0;
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap(SM). \n");
}
}
inline void evaluateLocalEnergyAndOverlap(bool addBetaBeta, const std::string& type, const int n, ComplexType* SM, ComplexType& eloc, ComplexType& ovlp_a, ComplexType& ovlp_b, const ComplexSMSpMat& Spvn, bool transposed, RealType dt)
{
ComplexType ekin,epot;
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateLocalEnergy(addBetaBeta,dt,SM,Spvn,ekin,epot,ovlp_a,ovlp_b,transposed,n);
eloc = ekin+epot;
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
EstimatorWfn->evaluateLocalEnergy(addBetaBeta,dt,SM,Spvn,ekin,epot,ovlp_a,ovlp_b,transposed,n);
eloc = ekin+epot;
} else {
eloc = 0;
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap(Spvn). \n");
}
}
inline void evaluateOverlap(const std::string& type, const int n, WalkerHandlerBase* wset)
{
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateOverlap(wset,true,n);
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
EstimatorWfn->evaluateOverlap(wset,false,n);
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateOverlap(wset). \n");
}
/*
int nw = wset->numWalkers(true);
if(nw==0) return;
ComplexType ovlp_a,ovlp_b;
if(new_algo) {
if(type == std::string("ImportanceSampling")) {
//ImpSampWfn->dist_evaluateOverlap(wset,n);
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
//EstimatorWfn->dist_evaluateOverlap(wset,n);
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap. \n");
}
} else {
if(type == std::string("ImportanceSampling")) {
for(int i=0; i<nw; i++) {
if(!wset->isAlive(i) || std::abs(wset->getWeight(i)) <= 1e-6) continue;
ImpSampWfn->evaluateOverlap(wset->getSM(i),ovlp_a,ovlp_b,n);
wset->setOvlp(i,ovlp_a,ovlp_b);
}
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
for(int i=0; i<nw; i++) {
if(!wset->isAlive(i) || std::abs(wset->getWeight(i)) <= 1e-6) continue;
EstimatorWfn->evaluateOverlap(wset->getSM(i),ovlp_a,ovlp_b,n);
wset->setOvlp2(i,ovlp_a,ovlp_b);
}
}
} else {
APP_ABORT("Unknown wavefunction type in evaluateLocalEnergyAndOverlap. \n");
}
}
*/
}
inline void evaluateOverlap(const std::string& type, const int n, ComplexType* SM, ComplexType& ovlp_a, ComplexType& ovlp_b)
{
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateOverlap(SM,ovlp_a,ovlp_b,n);
} else if(type == std::string("Estimator")) {
EstimatorWfn->evaluateOverlap(SM,ovlp_a,ovlp_b,n);
} else {
APP_ABORT("Unknown wavefunction type in evaluateOverlap(SM). \n");
}
}
inline void evaluateOneBodyMixedDensityMatrix(const std::string& type, WalkerHandlerBase* wset, ComplexSMVector* buf, int wlksz, int gfoffset, bool full=true)
{
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->evaluateOneBodyMixedDensityMatrix(wset,buf,wlksz,gfoffset,full);
} else if(type == std::string("Estimator")) {
EstimatorWfn->evaluateOneBodyMixedDensityMatrix(wset,buf,wlksz,gfoffset,full);
} else {
APP_ABORT("Unknown wavefunction type in evaluateOneBodyMixedDensityMatrix. \n");
}
}
// ComplexType evaluateLocalEnergy(const std::string& type, const int n, ComplexMatrix& SD) {}
// ComplexType evaluateOverlap(const std::string& type, const int n, ComplexMatrix& SD) {}
template<class T>
void calculateMixedMatrixElementOfOneBodyOperators(bool addBetaBeta, const std::string& type, const int n, ComplexType* SM, T& Spvn , std::vector<ComplexType>& v, bool transposed , bool needsG) {
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->calculateMixedMatrixElementOfOneBodyOperators(addBetaBeta,SM,Spvn,v,transposed,needsG,n);
} else if(type == std::string("Estimator")) {
EstimatorWfn->calculateMixedMatrixElementOfOneBodyOperators(addBetaBeta,SM,Spvn,v,transposed,needsG,n);
} else {
APP_ABORT("Unknown wavefunction type in calculateMixedMatrixElementOfOneBodyOperators. \n");
}
}
template<class T>
void calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(bool addBetaBeta, const std::string& type, const int n, ComplexType* buff, int ik0, int ikN, int pik0, T& Spvn , std::vector<ComplexType>& v, bool transposed , bool needsG) {
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(addBetaBeta,buff,ik0,ikN,pik0,Spvn,v,transposed,needsG,n);
} else if(type == std::string("Estimator")) {
EstimatorWfn->calculateMixedMatrixElementOfOneBodyOperatorsFromBuffer(addBetaBeta,buff,ik0,ikN,pik0,Spvn,v,transposed,needsG,n);
} else {
APP_ABORT("Unknown wavefunction type in calculateMixedMatrixElementOfOneBodyOperators. \n");
}
}
template<class T>
void calculateMixedMatrixElementOfTwoBodyOperators(bool addBetaBeta, const std::string& type, const int n, ComplexType* SM, const std::vector<s4D<ComplexType> >& vn, const std::vector<IndexType>& vn_indx, T&Spvn, std::vector<ComplexType>& v ) {
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->calculateMixedMatrixElementOfTwoBodyOperators(addBetaBeta,SM,vn,vn_indx,Spvn,v,n);
} else {
APP_ABORT("Unknown wavefunction type in calculateMixedMatrixElementOfTwoBodyOperators. \n");
}
}
template<class T>
void calculateMeanFieldMatrixElementOfOneBodyOperators(bool addBetaBeta, const std::string& type, const int n, T& Spvn, std::vector<ComplexType>& v ) {
if(type == std::string("ImportanceSampling")) {
ImpSampWfn->calculateMeanFieldMatrixElementOfOneBodyOperators(addBetaBeta,Spvn,v,n);
} else {
APP_ABORT("Unknown wavefunction type in calculateMeanFieldMatrixElementOfOneBodyOperators. \n");
}
}
bool isOccupAlpha( const std::string& type, int i) {
if(type == std::string("ImportanceSampling")) {
return ImpSampWfn->isOccupAlpha(i);
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
return EstimatorWfn->isOccupAlpha(i);
} else {
APP_ABORT("Error: Attempting to access uninitialized Estimator wavefunction \n");
}
} else {
APP_ABORT("Unknown wavefunction type in isOccupAlpha. \n");
}
}
bool isOccupBeta( const std::string& type, int i) {
if(type == std::string("ImportanceSampling")) {
return ImpSampWfn->isOccupBeta(i);
} else if(type == std::string("Estimator")) {
if(EstimatorWfn!=NULL) {
return EstimatorWfn->isOccupBeta(i);
} else {
APP_ABORT("Error: Attempting to access uninitialized Estimator wavefunction \n");
}
} else {
APP_ABORT("Unknown wavefunction type in isOccupBeta. \n");
}
}
bool check_initialized(const std::string& type)
{
if(type == std::string("ImportanceSampling")) {
ImpSampWfn!=NULL;
} else if(type == std::string("Estimator")) {
return EstimatorWfn!=NULL;
}
return false;
}
void setCommBuffer(std::vector<ComplexType>& bf)
{
//for(int i=0; i<wfns.size(); i++)
//wfns[i]->setCommBuffer(bf);
}
bool check_occ_orbs() {
return ImpSampWfn->check_occ_orbs();
}
std::string name;
int core_rank,ncores_per_TG;
bool head_of_nodes;
MPI_Comm MPI_COMM_HEAD_OF_NODES;
bool distribute_Ham; // implement assuming factorized Ham first
ComplexVector local_energy;
bool new_algo;
// used to identify the current step.
// The main purpose of this is to tellthe different
// wavefunction objects whether we are in the same step
// or not. This will allow us to reuse information already
// calculated in a previous section of the current step.
// e.g. Not to recalculate density matrices if we are redoing
// a local energy calculation on the same step.
// Specially useful for multideterminant calculations
int time_stamp;
// Hamiltonian object associated with these wavefuncitons
// The hamiltonian gives access to matrix elements,
// but trial wavefunction related quantities are calculated
// in this class.
// Make sure that there is consistency between the type of hamiltonian
// and the type of wavefunction object, e.g. sparse versus full matrix
HamPtr ham0;
WfnPtr phaselessWfn;
WfnPtr ImpSampWfn;
WfnPtr EstimatorWfn;
// stores pointers to WavefunctionBase objects owned by this object.
// This is necessary in case wfns are repeated
std::vector<WfnPtr> wfns;
};
}
#endif

View File

@ -0,0 +1,108 @@
#include<iostream>
#include<fstream>
#include<string>
#include<cstdlib>
#include<cassert>
#include<complex>
#include<map>
#include "AFQMC/config.0.h"
#include "AFQMC/Wavefunctions/WavefunctionHelper.h"
// Helper functions for slater determinant routines
namespace qmcplusplus
{
// taking from FCIQMC code. I'm sure there's a nicer way to do this using STL
int cmpDets(int NAEA, int NAEB, int* n, double &sg, std::vector<IndexType>::iterator sdet1, std::vector<IndexType>::iterator sdet2, std::vector<IndexType>& work )
{
sg=0.0;
int cnt=0,pos=0,ind[20],cnt2=0,nq=0;
bool found;
IndexType dummy = 30000;
for(int i=0; i<4; i++) n[i]=-1;
work.resize(NAEA+NAEB);
std::copy(sdet2,sdet2+NAEA+NAEB,work.begin());
for(int i=0; i<NAEA; i++) {
found=false;
for(int j=0; j<NAEA; j++)
if(*(sdet1+i) == *(sdet2+j)) {
found = true;
work[j]=dummy;
break;
}
if(!found) {
if(cnt<2) ind[cnt]=i;
cnt++;
if(cnt > 2) {
sg=0.0;
return 2*cnt;
}
}
}
for(int i=NAEA; i<NAEA+NAEB; i++) {
found=false;
for(int j=NAEA; j<NAEA+NAEB; j++)
if(*(sdet1+i) == *(sdet2+j)) {
found = true;
work[j]=dummy;
break;
}
if(!found) {
if(cnt<2) ind[cnt]=i;
cnt++;
if(cnt > 2) {
sg=0.0;
return 2*cnt;
}
}
}
if(cnt == 1) {
n[1]=static_cast<int>( *(sdet1+ind[0]) );
for(int i=0; i<NAEA+NAEB; i++) {
if(work[i] != dummy) { // there should be only one
nq = ind[0]-i;
n[0]=static_cast<int>(work[i]);
break;
}
}
sg = nq%2==0?1.0:-1.0;
} else if(cnt == 2) {
int iq1=-1,iq2=-1;
n[2]=static_cast<int>( *(sdet1+ind[0]) );
n[3]=static_cast<int>( *(sdet1+ind[1]) );
for(int i=0; i<NAEA+NAEB; i++)
if(work[i] != dummy) { // there should be only one
n[0]=static_cast<int>(work[i]);
iq1=i;
break;
}
for(int i=iq1+1; i<NAEA+NAEB; i++)
if(work[i] != dummy) { // there should be only one
n[1]=static_cast<int>(work[i]);
iq2=i;
break;
}
if(iq1 < 0 || iq2 < 0) {
std::cout<<"Problems in cmpDet: \n"
<<"det1: ";
for(int i=0; i<NAEA+NAEB; i++) std::cout<<*(sdet1+i) <<" ";
std::cout<<"\ndet2: ";
for(int i=0; i<NAEA+NAEB; i++) std::cout<<*(sdet2+i) <<" ";
std::cout<<std::endl;
std::cout.flush();
}
assert(iq1>=0 && iq2>=0);
nq = ind[0]-iq1+ind[1]-iq2;
sg = nq%2==0?1.0:-1.0;
} else
sg=0.0;
return 2*cnt;
}
}

View File

@ -0,0 +1,23 @@
#ifndef QMCPLUSPLUS_AFQMC_WAVEFUNCTIONHELPER_H
#define QMCPLUSPLUS_AFQMC_WAVEFUNCTIONHELPER_H
#include<iostream>
#include<fstream>
#include<string>
#include<cstdlib>
#include<cassert>
#include<complex>
#include<map>
#include "AFQMC/config.0.h"
// Helper functions for slater determinant routines
namespace qmcplusplus
{
// taking from FCIQMC code. I'm sure there's a nicer way to do this using STL
int cmpDets(int NAEA, int NAEB, int* n, double &sg, std::vector<IndexType>::iterator sdet1, std::vector<IndexType>::iterator sdet2, std::vector<IndexType>& work );
}
#endif

224
src/AFQMC/config.0.h Executable file
View File

@ -0,0 +1,224 @@
#ifndef AFQMC_CONFIG_0_H
#define AFQMC_CONFIG_0_H
#include <string>
#include <algorithm>
#include<cstdlib>
#include<ctype.h>
#include <vector>
#include <map>
#include <complex>
#include <tuple>
#include <fstream>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
//#include"../FCIQMC/config.0.h"
namespace qmcplusplus
{
// template<typename spT> using ShmemAllocator = boost::interprocess::allocator<spT, boost::interprocess::managed_shared_memory::segment_manager>;
// template<typename spT> using SMVector = boost::interprocess::vector<spT, ShmemAllocator<spT>>;
typedef uint32_t IndexType;
typedef uint32_t OrbitalType;
// typedef uint16_t OrbitalType;
typedef double RealType;
#if defined(QMC_COMPLEX)
typedef std::complex<RealType> ValueType;
#else
typedef RealType ValueType;
#endif
typedef std::complex<RealType> ComplexType;
#define HAVE_CPP11
#ifdef HAVE_CPP11
template<typename T> using s1D = std::tuple<IndexType,T>;
template<typename T> using s2D = std::tuple<IndexType,IndexType,T>;
template<typename T> using s3D = std::tuple<IndexType,IndexType,IndexType,T>;
template<typename T> using s4D = std::tuple<IndexType,IndexType,IndexType,IndexType,T>;
#else
template<typename T> struct s1D { T value; IndexType i1; };
template<typename T> struct s2D { T value; IndexType i1, i2; };
template<typename T> struct s3D { T value; IndexType i1, i2, i3; };
template<typename T> struct s4D { T value; IndexType i1, i2, i3, i4; };
template<typename T> struct s5D { T value; IndexType i1, i2, i3, i4, i5; };
template<typename T> struct s9D { T value; IndexType i1, i2, i3, i4, i5, i6, i7, i8, i9; };
#endif
template<typename T>
inline bool isComplex(const T& a)
{
return std::is_same<T,std::complex<RealType>>::value;
}
template<typename T>
inline ComplexType toComplex(const T& a);
template<>
inline ComplexType toComplex(const RealType& a)
{
return ComplexType(a,0.0);
}
template<>
inline ComplexType toComplex(const std::complex<RealType>& a)
{
return a;
}
template<typename T>
inline void setImag(T& a, RealType b);
template<>
inline void setImag(RealType& a, RealType b)
{
}
template<>
inline void setImag(std::complex<RealType>& a, RealType b)
{
a.imag(b);
}
template<typename T>
inline T myconj(const T& a)
{
return a;
}
template<typename T>
inline std::complex<T> myconj(const std::complex<T>& a)
{
return std::conj(a);
}
template<typename T>
inline RealType mynorm(const T& a)
{
return a*a;
}
template<typename T>
inline RealType mynorm(const std::complex<T> &a)
{
return std::norm(a);
}
template<typename T>
inline std::complex<T> operator*(const int &lhs, const std::complex<T> &rhs)
{
return T(lhs) * rhs;
}
template<typename T>
inline std::complex<T> operator*(const std::complex<T> &lhs, const int &rhs)
{
return lhs * T(rhs);
}
inline bool sortDecreasing (int i,int j) { return (i>j); }
struct _mySort_snD_ {
bool operator() (const s1D<RealType>& lhs, const s1D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs));
}
bool operator() (const s2D<RealType>& lhs, const s2D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs)) ||
( !(bool)(std::get<0>(rhs) < std::get<0>(lhs)) &&
(bool)(std::get<1>(lhs) < std::get<1>(rhs)) );
}
bool operator() (const s4D<RealType>& lhs, const s4D<RealType>& rhs)
{
return std::forward_as_tuple(std::get<0>(lhs),std::get<1>(lhs),std::get<2>(lhs),std::get<3>(lhs)) < std::forward_as_tuple(std::get<0>(rhs),std::get<1>(rhs),std::get<2>(rhs),std::get<3>(rhs));
}
/* I'm having issues with this stupid function. What's wrong???
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs)) ||
( !(bool)(std::get<0>(rhs) < std::get<0>(lhs)) &&
(bool)(std::get<1>(lhs) < std::get<1>(rhs)) ||
( !(bool)(std::get<1>(rhs) < std::get<1>(lhs)) &&
(bool)(std::get<2>(lhs) < std::get<2>(rhs)) ||
( !(bool)(std::get<2>(rhs) < std::get<2>(lhs)) &&
(bool)(std::get<3>(lhs) < std::get<3>(rhs)))));
}
*/
bool operator() (const s1D<std::complex<RealType> >& lhs, const s1D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs));
}
bool operator() (const s2D<std::complex<RealType> >& lhs, const s2D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs)) ||
( !(bool)(std::get<0>(rhs) < std::get<0>(lhs)) &&
(bool)(std::get<1>(lhs) < std::get<1>(rhs)) );
}
bool operator() (const s4D<std::complex<RealType> >& lhs, const s4D<std::complex<RealType> >& rhs)
{
return std::forward_as_tuple(std::get<0>(lhs),std::get<1>(lhs),std::get<2>(lhs),std::get<3>(lhs)) < std::forward_as_tuple(std::get<0>(rhs),std::get<1>(rhs),std::get<2>(rhs),std::get<3>(rhs));
}
/*
{ return (bool)(std::get<0>(lhs) < std::get<0>(rhs)) ||
( !(bool)(std::get<0>(rhs) < std::get<0>(lhs)) &&
(bool)(std::get<1>(lhs) < std::get<1>(rhs)) ||
( !(bool)(std::get<1>(rhs) < std::get<1>(lhs)) &&
(bool)(std::get<2>(lhs) < std::get<2>(rhs)) ||
( !(bool)(std::get<2>(rhs) < std::get<2>(lhs)) &&
(bool)(std::get<3>(lhs) < std::get<3>(rhs)))));
}
*/
};
struct _myEqv_snD_ {
// equivalence
bool operator() (const s1D<RealType>& lhs, const s1D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) == std::get<0>(rhs));
}
bool operator() (const s2D<RealType>& lhs, const s2D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) == std::get<0>(rhs))
&& (bool)(std::get<1>(lhs) == std::get<1>(rhs));
}
bool operator() (const s4D<RealType>& lhs, const s4D<RealType>& rhs)
{ return (bool)(std::get<0>(lhs) == std::get<0>(rhs))
&& (bool)(std::get<1>(lhs) == std::get<1>(rhs))
&& (bool)(std::get<2>(lhs) == std::get<2>(rhs))
&& (bool)(std::get<3>(lhs) == std::get<3>(rhs));
}
bool operator() (const s1D<std::complex<RealType> >& lhs, const s1D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) == std::get<0>(rhs));
}
bool operator() (const s2D<std::complex<RealType> >& lhs, const s2D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) == std::get<0>(rhs))
&& (bool)(std::get<1>(lhs) == std::get<1>(rhs));
}
bool operator() (const s4D<std::complex<RealType> >& lhs, const s4D<std::complex<RealType> >& rhs)
{ return (bool)(std::get<0>(lhs) == std::get<0>(rhs))
&& (bool)(std::get<1>(lhs) == std::get<1>(rhs))
&& (bool)(std::get<2>(lhs) == std::get<2>(rhs))
&& (bool)(std::get<3>(lhs) == std::get<3>(rhs));
}
};
}
namespace std {
template<typename T>
inline bool operator<(const std::complex<T> &lhs, const std::complex<T> &rhs)
{
if (lhs.real() != rhs.real())
{
return lhs.real() < rhs.real();
}
return lhs.imag() < rhs.imag();
}
}
#endif

176
src/AFQMC/config.h Executable file
View File

@ -0,0 +1,176 @@
#ifndef AFQMC_CONFIG_H
#define AFQMC_CONFIG_H
#include <string>
#include <algorithm>
#include<cstdlib>
#include<ctype.h>
#include <vector>
#include <map>
#include <complex>
#include <tuple>
#include <fstream>
#include "Configuration.h"
#include "OhmmsData/AttributeSet.h"
#include "OhmmsData/ParameterSet.h"
#include <OhmmsPETE/OhmmsMatrix.h>
#include <OhmmsPETE/Tensor.h>
#include <OhmmsPETE/OhmmsVector.h>
#include <OhmmsPETE/TinyVector.h>
#include "AFQMC/config.0.h"
#include "AFQMC/Matrix/SparseMatrix.h"
#include "AFQMC/Matrix/SMSparseMatrix.h"
#include "AFQMC/Matrix/SMDenseVector.h"
#include "AFQMC/Utilities/myTimer.h"
extern myTimer Timer;
#define AFQMC_DEBUG 3
#define AFQMC_TIMER
// careful here that RealType is consistent with this!!!
#define MKL_Complex8 std::complex<float>
#define MKL_Complex16 std::complex<double>
namespace qmcplusplus
{
typedef Vector<IndexType> IndexVector;
typedef Vector<RealType> RealVector;
typedef Vector<ValueType> ValueVector;
typedef Vector<ComplexType> ComplexVector;
typedef SMDenseVector<IndexType> IndexSMVector;
typedef SMDenseVector<RealType> RealSMVector;
typedef SMDenseVector<ValueType> ValueSMVector;
typedef SMDenseVector<ComplexType> ComplexSMVector;
typedef Matrix<IndexType> IndexMatrix;
typedef Matrix<RealType> RealMatrix;
typedef Matrix<ValueType> ValueMatrix;
typedef Matrix<ComplexType> ComplexMatrix;
typedef SparseMatrix<IndexType> IndexSpMat;
typedef SparseMatrix<RealType> RealSpMat;
typedef SparseMatrix<ValueType> ValueSpMat;
typedef SparseMatrix<ComplexType> ComplexSpMat;
typedef SMSparseMatrix<IndexType> IndexSMSpMat;
typedef SMSparseMatrix<RealType> RealSMSpMat;
typedef SMSparseMatrix<ValueType> ValueSMSpMat;
typedef SMSparseMatrix<ComplexType> ComplexSMSpMat;
struct AFQMCInfo
{
public:
// default constructor
AFQMCInfo():name(""),NMO(-1),NMO_FULL(-1),NAEA(-1),NAEB(-1),NCA(0),NCB(0),NETOT(-1),MS2(-99)
,spinRestricted(-1),ISYM(-1)
{}
// destructor
~AFQMCInfo() {}
// name
std::string name;
// number of orbitals
int NMO_FULL;
// number of active orbitals
int NMO;
// number of active electrons alpha/beta
int NAEA, NAEB;
// number of core electrons alpha/beta
int NCA,NCB;
// total number of electrons
int NETOT;
// ms2
int MS2;
// isym
int ISYM;
// if true then RHF calculation, otherwise it is UHF
bool spinRestricted;
// copies values from object
void copyInfo(const AFQMCInfo& a) {
name=a.name;
NMO_FULL=a.NMO_FULL;
NMO=a.NMO;
NAEA=a.NAEA;
NAEB=a.NAEB;
NCA=a.NCA;
NCB=a.NCB;
NETOT=a.NETOT;
MS2=a.MS2;
ISYM=a.ISYM;
spinRestricted=a.spinRestricted;
}
// no fully spin polarized yet, not sure what it will break
bool checkAFQMCInfoState() {
if(NMO_FULL<1 || NAEA<1 || NAEB<1 || NCA<0 || NCB<0 ) //|| NETOT!= NCA+NCB+NAEA+NAEB ) //|| MS2<0 )
return false;
return true;
}
void printAFQMCInfoState(std::ostream& out) {
out<<"AFQMC info: \n"
<<"name: " <<name <<"\n"
<<"NMO_FULL: " <<NMO_FULL <<"\n"
<<"NAEA: " <<NAEA <<"\n"
<<"NAEB: " <<NAEB <<"\n"
<<"NCA: " <<NCA <<"\n"
<<"NCB: " <<NCB <<"\n"
<<"NETOT: " <<NETOT <<"\n"
<<"MS2: " <<MS2 <<"\n"
<<"spinRestricted: " <<spinRestricted <<std::endl;
}
bool parse(xmlNodePtr cur)
{
if(cur == NULL)
return false;
xmlNodePtr curRoot=cur;
OhmmsAttributeSet oAttrib;
oAttrib.add(name,"name");
oAttrib.put(cur);
std::string sR("yes");
ParameterSet m_param;
m_param.add(NMO_FULL,"NMO_FULL","int");
m_param.add(NMO_FULL,"NMO","int");
m_param.add(NAEA,"NAEA","int");
m_param.add(NAEB,"NAEB","int");
m_param.add(NCA,"NCA","int");
m_param.add(NCB,"NCB","int");
m_param.add(NETOT,"NETOT","int");
m_param.add(MS2,"MS2","int");
m_param.add(sR,"spinRestricted","string");
m_param.put(cur);
spinRestricted=false;
std::string sR0(sR);
std::transform(sR0.begin(),sR0.end(),sR.begin(),(int (*)(int))tolower);
if(sR == "yes" || sR == "true") spinRestricted = true;
NMO = NMO_FULL-NCA;
return true;
}
};
}
#endif

18
src/AFQMC/todo.dat Normal file
View File

@ -0,0 +1,18 @@
Parallelization / Distributed routines:
Finish:
1. Wave-function parallelization routines. Partition hamiltonian and write versions for MultiSD
2.
Ideas to implement:
- Cholesky vectors are symmetric when real (and hermitian when complex???). Think about how to use this in your favor.
- Postprocessing driver. This will look over walkers saved in configuration files and calculate properties.
- Test and use idea where the projection is done with a large cutoff, but the energy is corrected by postprocessing configurations with a small cutoff. See how well it works.
- Combine load balancing and population control into a single routine.
- Implement version of energy and propagation routine where Spvn and Muv are stored as single precision real matrices. For chemical hamiltonians Spvn is purely complex and Muv is real, so no need to store these as complex. Apply complex factors as needed later. This should save a factor of 4 in memory.
- Implement MP2 NO or selected CI rotation of the basis.
- Finish GHF walkers
- implement a "meanfield_substraction" wave-function tag that can be used to calculate vMF in Propagator.
test the dependence of the variance and bias on the quality of vMF for a fixed importancesampling twf
- In the hybrid method, there is a clear differentiation between the twf used for force bias and the twf used for force biasing. Implement the option to use different ones. For example, keep importancesampling for that purpose and add a force_bias type to the wfnhandler. Test the dependence of the variance, execution speed and bias on the choice of force-bias wfn and importance sampling. The point is that for large multideterminant twfs i the hybrid mode, the most expensive part of the calculation is the exaluation of the green's function, which depends only on the force-bias twf. You can use a medium quality function force-bias and a very high quality wavefunction for importance sampling. Is this useful?

View File

@ -199,6 +199,15 @@ else()
SUBDIRS(QMCWaveFunctions)
SUBDIRS(QMCHamiltonians)
SUBDIRS(QMCDrivers)
IF(BUILD_AFQMC)
SUBDIRS(AFQMC)
ENDIF(BUILD_AFQMC)
IF(BUILD_FCIQMC)
SUBDIRS(FCIQMC)
ENDIF(BUILD_FCIQMC)
SUBDIRS(QMCApp)
#IF(BUILD_QMCTOOLS)

View File

@ -69,7 +69,7 @@ template<typename T> inline void Communicate::gather(T& sb, T& rb, int dest)
template<typename T>
inline void Communicate::allgather(T& sb, T& rb, int count)
{
APP_ABORT("Need specialization for gatherv(T&, T&, int)");
APP_ABORT("Need specialization for allgather(T&, T&, int)");
}
template<typename T, typename IT>
@ -117,6 +117,93 @@ Communicate::isend(int dest, int tag, T*, int n)
return MPI_REQUEST_NULL;
}
template<typename T> inline void Communicate::allreduce(T&, mpi_comm_type comm )
{
APP_ABORT("Need specialization for allreduce(T&,comm)");
}
template<typename T> inline void
Communicate::gsum(T&, mpi_comm_type comm)
{
APP_ABORT("Need specialization for Communicate::::gsum(T&)");
}
template<typename T> inline void
Communicate::bcast(T&, mpi_comm_type comm )
{
APP_ABORT("Need specialization for bcast(T&,comm)");
}
template<typename T> inline void
Communicate::bcast(T* restrict ,int n, mpi_comm_type comm)
{
APP_ABORT("Need specialization for bcast(T* restrict ,int n, mpi_comm_type comm)");
}
template<typename T> inline void
Communicate::bcast(T* restrict ,int n, int orig, mpi_comm_type comm)
{
APP_ABORT("Need specialization for bcast(T* restrict ,int n, int orig, mpi_comm_type comm)");
}
template<typename T> inline void
Communicate::send(T* restrict, int n, int dest, int tag, mpi_comm_type comm)
{
APP_ABORT("Need specialization for send(T*, int, int, int, mpi_comm_type )");
}
template<typename T> inline void
Communicate::recv(T* restrict, int n, int dest, int tag, mpi_comm_type comm, MPI_Status*)
{
APP_ABORT("Need specialization for recv(T&, int, int, int, mpi_comm_type )");
}
template<typename T>
inline void Communicate::allgather(T& sb, T& rb, int count, mpi_comm_type comm)
{
APP_ABORT("Need specialization for allgather(T&, T&, int, comm)");
}
template<typename T>
inline void Communicate::allgather(T* sb, T* rb, int count)
{
APP_ABORT("Need specialization for allgather(T*, T*, int)");
}
template<typename T, typename IT>
inline void Communicate::gatherv(T* sb, T* rb, int n, IT&, IT&, int dest)
{
APP_ABORT("Need specialization for gatherv(T*, T*, int, IT&, IT&, int)");
}
template<typename T, typename IT>
inline void Communicate::gatherv(T* sb, T* rb, int n, IT&, IT&, int dest, MPI_Comm)
{
APP_ABORT("Need specialization for gatherv(T*, T*, int, IT&, IT&, int,MPI_Comm)");
}
template<typename T, typename IT>
inline void Communicate::scatterv(T* sb, T* rb, int n, IT&, IT&, int source, MPI_Comm comm)
{
APP_ABORT("Need specialization for scatterv(T*, T*, int n, IT&, IT&, int, MPI_Comm)");
}
template<typename T> inline void
Communicate::gsum(T&)
{
APP_ABORT("Need specialization for Communicate::::gsum(T&)");
}
template<typename T> inline void
Communicate::gmax(T&, mpi_comm_type comm)
{
APP_ABORT("Need specialization for Communicate::::gmax(T&)");
}
template<>
inline void gsum(int& g, int gid)
{
@ -386,6 +473,13 @@ Communicate::bcast(uint32_t & g)
MPI_Bcast(&g,1,MPI_UNSIGNED,0,myMPI);
}
template<>
inline void
Communicate::bcast(std::vector<uint32_t>& g)
{
MPI_Bcast(&(g[0]),g.size(),MPI_UNSIGNED,0,myMPI);
}
template<>
inline void
@ -496,7 +590,7 @@ Communicate::bcast(qmcplusplus::Vector<std::complex<double> >& g)
template<>
inline void
Communicate::bcast(qmcplusplus::Vector<complex<float> >& g)
Communicate::bcast(qmcplusplus::Vector<std::complex<float> >& g)
{
MPI_Bcast(&(g[0]),2*g.size(),MPI_FLOAT,0,myMPI);
}
@ -576,7 +670,7 @@ Communicate::bcast(Array<std::complex<double>,3> &g)
template<>
inline void
Communicate::bcast(Array<complex<float>,3> &g)
Communicate::bcast(Array<std::complex<float>,3> &g)
{
MPI_Bcast(g.data(), 2*g.size(), MPI_FLOAT, 0, myMPI);
}
@ -709,6 +803,21 @@ Communicate::irecv(int source, int tag, std::vector<double>& g)
return r;
}
template<>
inline void
Communicate::gatherv(std::vector<char>& l, std::vector<char>& g,
std::vector<int>& counts, std::vector<int>& displ, int dest)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Gatherv(&l[0], l.size(), MPI_CHAR,
&g[0], &counts[0], &displ[0], MPI_CHAR, dest, myMPI);
}
template<>
inline void
Communicate::gatherv(std::vector<double>& l, std::vector<double>& g,
@ -764,6 +873,20 @@ Communicate::allgather(std::vector<char>& sb,
MPI_Allgather(&sb[0], count, MPI_CHAR, &rb[0], count, MPI_CHAR, myMPI);
}
template<>
inline void
Communicate::allgather(std::vector<int>& sb,
std::vector<int>& rb, int count)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(sb.size()*sizeof(int)<cray_short_msg_size)
this->barrier();
#endif
MPI_Allgather(&sb[0], count, MPI_INT, &rb[0], count, MPI_INT, myMPI);
}
template<>
inline void
@ -806,6 +929,32 @@ Communicate::gather(std::vector<double>& l, std::vector<double>& g, int dest)
&g[0], l.size(), MPI_DOUBLE, dest, myMPI);
}
template<>
inline void
Communicate::gather(std::vector<char>& l, std::vector<char>& g, int dest)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Gather(&l[0], l.size(), MPI_CHAR,
&g[0], l.size(), MPI_CHAR, dest, myMPI);
}
template<>
inline void
Communicate::gather(std::vector<int>& l, std::vector<int>& g, int dest)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(int)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Gather(&l[0], l.size(), MPI_INT,
&g[0], l.size(), MPI_INT, dest, myMPI);
}
template<>
inline void
Communicate::gatherv(PooledData<double>& l, PooledData<double>& g,
@ -833,6 +982,305 @@ Communicate::gather(PooledData<double>& l, PooledData<double>& g, int dest)
int ierr = MPI_Gather(l.data(), l.size(), MPI_DOUBLE,
g.data(), l.size(), MPI_DOUBLE, dest, myMPI);
}
template<>
inline void
Communicate::gsum(std::vector<int>& g)
{
std::vector<int> gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),g.size(),MPI_INT,MPI_SUM,myMPI);
g = gt;
}
template<>
inline void
Communicate::gsum(std::vector<double>& g)
{
std::vector<double> gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),g.size(),MPI_DOUBLE,MPI_SUM,myMPI);
g = gt;
}
template<>
inline void gsum(std::vector<complex<double> >& g, int gid)
{
std::vector<complex<double> > gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),2*g.size(),MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
g = gt;
}
template<>
inline void
Communicate::gsum(std::vector<std::complex<double> >& g)
{
std::vector<std::complex<double> > gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),2*g.size(),MPI_DOUBLE,MPI_SUM,myMPI);
g = gt;
}
template<>
inline void
Communicate::gatherv(char* l, char* g, int n,
std::vector<int>& counts, std::vector<int>& displ, int dest)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Gatherv(l, n, MPI_CHAR,
g, &counts[0], &displ[0], MPI_CHAR, dest, myMPI);
}
template<>
inline void
Communicate::allgather(char* sb,
char* rb, int count)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(count*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
MPI_Allgather(sb, count, MPI_CHAR, rb, count, MPI_CHAR, myMPI);
}
template<>
inline void
Communicate::gsum(std::vector<int>& g, mpi_comm_type comm)
{
std::vector<int> gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),g.size(),MPI_INT,MPI_SUM,comm);
g = gt;
}
template<>
inline void
Communicate::gmax(std::vector<int>& g, mpi_comm_type comm)
{
std::vector<int> gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),g.size(),MPI_INT,MPI_MAX,comm);
g = gt;
}
template<>
inline void
Communicate::allreduce(std::vector<double>& g, mpi_comm_type comm)
{
std::vector<double> gt(g.size(), 0.0);
MPI_Allreduce(&(g[0]),&(gt[0]),g.size(),MPI_DOUBLE,MPI_SUM,
comm);
g = gt;
}
template<>
inline void
Communicate::bcast(std::vector<double>& g, mpi_comm_type comm)
{
MPI_Bcast(&(g[0]),g.size(),MPI_DOUBLE,0,comm);
}
template<>
inline void
Communicate::bcast(std::vector<float>& g, mpi_comm_type comm)
{
MPI_Bcast(&(g[0]),g.size(),MPI_FLOAT,0,comm);
}
template<>
inline void
Communicate::bcast(std::vector<int>& g, mpi_comm_type comm)
{
MPI_Bcast(&(g[0]),g.size(),MPI_INT,0,comm);
}
template<>
inline void
Communicate::bcast(std::complex<double>* restrict x, int n, int rk, mpi_comm_type comm)
{
MPI_Bcast(x,2*n,MPI_DOUBLE,rk,comm);
}
template<>
inline void
Communicate::bcast(double* restrict x, int n, int rk, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_DOUBLE,rk,comm);
}
template<>
inline void
Communicate::bcast(float* restrict x, int n, int rk, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_FLOAT,rk,comm);
}
template<>
inline void
Communicate::bcast(int* restrict x, int n, int rk, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_INT,rk,comm);
}
template<>
inline void
Communicate::bcast(char* restrict x, int n, int rk, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_CHAR,rk,comm);
}
template<>
inline void
Communicate::bcast(double* restrict x, int n, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_DOUBLE,0,comm);
}
template<>
inline void
Communicate::bcast(float* restrict x, int n, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_FLOAT,0,comm);
}
// MMORALES: Is this portable??? uint32_t <-> MPI_UNSIGNED???
// FIX FIX FIX
template<>
inline void
Communicate::bcast(uint32_t* restrict x, int n, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_UNSIGNED,0,comm);
}
template<>
inline void
Communicate::bcast(int* restrict x, int n, mpi_comm_type comm)
{
MPI_Bcast(x,n,MPI_INT,0,comm);
}
template<> inline void
Communicate::send(double* restrict x, int n, int dest, int tag, mpi_comm_type comm)
{
MPI_Send(x,n,MPI_DOUBLE,dest,tag,comm);
}
template<> inline void
Communicate::recv(double* restrict x, int n, int dest, int tag, mpi_comm_type comm, MPI_Status* st)
{
MPI_Recv(x,n,MPI_DOUBLE,dest,tag,comm,st);
}
template<> inline void
Communicate::send(uint32_t* restrict x, int n, int dest, int tag, mpi_comm_type comm)
{
MPI_Send(x,n,MPI_UNSIGNED,dest,tag,comm);
}
template<> inline void
Communicate::recv(uint32_t* restrict x, int n, int dest, int tag, mpi_comm_type comm, MPI_Status *st)
{
MPI_Recv(x,n,MPI_UNSIGNED,dest,tag,comm,st);
}
template<> inline void
Communicate::send(int* restrict x, int n, int dest, int tag, mpi_comm_type comm)
{
MPI_Send(x,n,MPI_INT,dest,tag,comm);
}
template<> inline void
Communicate::recv(int* restrict x, int n, int dest, int tag, mpi_comm_type comm, MPI_Status *st)
{
MPI_Recv(x,n,MPI_INT,dest,tag,comm,st);
}
template<> inline void
Communicate::send(std::complex<double>* restrict x, int n, int dest, int tag, mpi_comm_type comm)
{
MPI_Send(x,2*n,MPI_DOUBLE,dest,tag,comm);
}
template<> inline void
Communicate::recv(std::complex<double>* restrict x, int n, int dest, int tag, mpi_comm_type comm, MPI_Status* st)
{
MPI_Recv(x,2*n,MPI_DOUBLE,dest,tag,comm,st);
}
template<>
inline void
Communicate::scatterv(char* sb, char* rb, int n,
std::vector<int>& counts, std::vector<int>& displ, int source, MPI_Comm comm)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Scatterv(sb, &counts[0], &displ[0], MPI_CHAR,
rb, n, MPI_CHAR, source, comm);
}
template<>
inline void
Communicate::scatterv(std::vector<char>& sb, std::vector<char>& rb,
std::vector<int>& counts, std::vector<int>& displ, int source)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Scatterv(&sb[0], &counts[0], &displ[0], MPI_CHAR,
&rb[0], rb.size(), MPI_CHAR, source, myMPI);
}
template<>
inline void
Communicate::gatherv(char* l, char* g, int n,
std::vector<int>& counts, std::vector<int>& displ, int dest, MPI_Comm comm)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(l.size()*sizeof(char)<cray_short_msg_size)
this->barrier();
#endif
int ierr = MPI_Gatherv(l, n, MPI_CHAR,
g, &counts[0], &displ[0], MPI_CHAR, dest, comm);
}
template<>
inline void
Communicate::allgather(std::vector<int>& sb,
std::vector<int>& rb, int count, mpi_comm_type comm)
{
#if defined(_CRAYMPI)
const int cray_short_msg_size=128000;
if(sb.size()*sizeof(int)<cray_short_msg_size)
this->barrier();
#endif
MPI_Allgather(&sb[0], count, MPI_INT, &rb[0], count, MPI_INT, comm);
}
template<>
inline void
Communicate::allreduce(qmcplusplus::Matrix<complex<double> >& g)
{
std::vector<complex<double> > gt(g.size());
std::copy(g.begin(),g.end(),gt.begin());
MPI_Allreduce(g.data(), &gt[0], 2*g.size(), MPI_DOUBLE, MPI_SUM,
myMPI);
std::copy(gt.begin(),gt.end(),g.data());
}
template<>
inline void
Communicate::bcast(std::complex<double>& g)
{
MPI_Bcast(&g,2,MPI_DOUBLE,0,myMPI);
}
#endif
/***************************************************************************
* $RCSfile$ $Author: kesler $

View File

@ -74,6 +74,71 @@ template<typename T, typename IT>
inline void Communicate::scatterv(T& sb, T& rb, IT&, IT&, int source)
{
}
template<typename T>
void allreduce(T&,Communicate::mpi_comm_type comm)
{ }
template<typename T>
void bcast(T&,Communicate::mpi_comm_type)
{ }
template<typename T>
void bcast(T* restrict, int n,Communicate::mpi_comm_type comm)
{ }
template<typename T>
void bcast(T* restrict, int n, int orig, Communicate::mpi_comm_type comm)
{ }
template<typename T>
void send(T* restrict, int n, int dest, int tag, Communicate::mpi_comm_type comm)
{ }
#ifdef HAVE_MPI
template<typename T>
void recv(T* restrict, int n, int dest, int tag, Communicate::mpi_comm_type comm, MPI_Status*)
{ }
#endif
template<typename T, typename IT>
void gatherv(T* sb, T* rb, int n, IT& counts, IT& displ, int dest)
{ }
#ifdef HAVE_MPI
template<typename T, typename IT>
void gatherv(T* sb, T* rb, int n,IT& counts, IT& displ, int dest, MPI_Comm comm)
{ }
#endif
template<typename T>
void allgather(T& sb, T& rb, int count, Communicate::mpi_comm_type comm)
{ }
template<typename T>
void allgather(T* sb, T* rb, int count)
{ }
#ifdef HAVE_MPI
template<typename T, typename IT>
void scatterv(T* sb, T* rb, int n, IT& counts, IT& displ, int source, MPI_Comm)
{ }
#endif
template<typename T>
void gsum(T&)
{ }
#ifdef HAVE_MPI
template<typename T>
void gsum(T&,Communicate::mpi_comm_type comm)
{ }
#endif
template<typename T>
void gmax(T&,Communicate::mpi_comm_type comm)
{ }
#endif
/***************************************************************************

View File

@ -61,7 +61,8 @@ struct CommunicatorTraits
#include <string>
#include <vector>
#include <utility>
#include <unistd.h>
#include <cstring>
/**@class Communicate
* @ingroup Message
@ -170,6 +171,37 @@ public:
return myName;
}
// MMORALES: leaving this here temprarily, but it doesn;t belong here.
// MMORALES: FIX FIX FIX
#ifdef HAVE_MPI
inline bool head_nodes(MPI_Comm& MPI_COMM_HEAD_OF_NODES) {
char hostname[HOST_NAME_MAX];
gethostname(hostname,HOST_NAME_MAX);
int myrank=rank(),nprocs=size();
char *dummy = new char[nprocs*HOST_NAME_MAX];
MPI_Allgather(hostname,HOST_NAME_MAX,MPI_CHAR,dummy,HOST_NAME_MAX,MPI_CHAR,myMPI);
bool head_of_node=true;
for(int i=0; i<myrank; i++)
if( strcmp(hostname,dummy+i*HOST_NAME_MAX)==0 ) { head_of_node=false; break;}
int key = head_of_node?0:10;
MPI_Comm_split(myMPI,key,myrank,&MPI_COMM_HEAD_OF_NODES);
delete dummy;
return head_of_node;
}
#endif
// MMORALES:
// right now there is no easy way to use Communicate
// for generic processor subgroups, so calling split on myMPI
// and managing the communicator directly
// THIS MUST BE FIXED!!!
#ifdef HAVE_MPI
inline void split_comm(int key, MPI_Comm& comm) {
int myrank=rank(),nprocs=size();
MPI_Comm_split(myMPI,key,myrank,&comm);
}
#endif
template<typename T> void allreduce(T&);
template<typename T> void reduce(T&);
template<typename T> void reduce(T* restrict, T* restrict, int n);
@ -187,6 +219,31 @@ public:
template<typename T> request irecv(int source, int tag, T*, int n);
template<typename T> request isend(int dest, int tag, T*, int n);
// MMORALES: this is just a temporary fix for the communicator problem
// Adding needed routines with explicit communicator arguments
// until I fix the problem.
template<typename T> void allreduce(T&,mpi_comm_type comm);
template<typename T> void bcast(T&,mpi_comm_type);
template<typename T> void bcast(T* restrict, int n,mpi_comm_type comm);
template<typename T> void bcast(T* restrict, int n, int orig, mpi_comm_type comm);
template<typename T> void send(T* restrict, int n, int dest, int tag, mpi_comm_type comm);
#ifdef HAVE_MPI
template<typename T> void recv(T* restrict, int n, int dest, int tag, mpi_comm_type comm, MPI_Status*);
#endif
template<typename T, typename IT> void gatherv(T* sb, T* rb, int n, IT& counts, IT& displ, int dest=0);
#ifdef HAVE_MPI
template<typename T, typename IT> void gatherv(T* sb, T* rb, int n,IT& counts, IT& displ, int dest, MPI_Comm comm);
#endif
template<typename T> void allgather(T& sb, T& rb, int count, mpi_comm_type comm);
template<typename T> void allgather(T* sb, T* rb, int count);
#ifdef HAVE_MPI
template<typename T, typename IT> void scatterv(T* sb, T* rb, int n, IT& counts, IT& displ, int source, MPI_Comm);
#endif
template<typename T> void gsum(T&);
template<typename T> void gsum(T&,mpi_comm_type comm);
template<typename T> void gmax(T&,mpi_comm_type comm);
protected:
mpi_comm_type myMPI;

View File

@ -76,6 +76,13 @@
#define dhgeqz dhgeqz_
#define dtgexc dtgexc_
#define dtgevc dtgevc_
#define dsyevr dsyevr_
#define zheevr zheevr_
#define zhegvx zhegvx_
#define zgeqrf zgeqrf_
#define zungqr zungqr_
#endif
// Clang issues a warning if the C return type is std::complex<double>
@ -239,6 +246,22 @@ extern "C" {
void sggev(char *JOBVL, char *JOBVR, int *N, float *A, int *LDA, float *B, int *LDB,float *ALPHAR, float *ALPHAI,
float *BETA, float *VL, int *LDVL, float *VR, int *LDVR, float *WORK, int *LWORK, int *INFO );
void dsyevr (char &JOBZ, char &RANGE, char &UPLO, int &N, double *A, int &LDA, double &VL, double &VU, int &IL,
int &IU, double &ABSTOL, int &M, double *W, double* Z, int &LDZ, int* ISUPPZ, double *WORK,
int &LWORK, int* IWORK, int &LIWORK, int &INFO);
void zheevr (char &JOBZ, char &RANGE, char &UPLO, int &N, std::complex<double> *A, int &LDA, double &VL, double &VU,
int &IL, int &IU, double &ABSTOL, int &M, double *W, std::complex<double>* Z, int &LDZ, int* ISUPPZ,
std::complex<double> *WORK, int &LWORK, double* RWORK, int &LRWORK, int* IWORK, int &LIWORK, int &INFO);
void zhegvx (int&, char &JOBZ, char &RANGE, char &UPLO, int &N, std::complex<double> *A, int &LDA, std::complex<double> *B,
int &LDB, double &VL, double &VU, int &IL, int &IU, double &ABSTOL, int &M, double *W, std::complex<double>* Z,
int &LDZ, std::complex<double> *WORK, int &LWORK, double* RWORK, int* IWORK, int* IFAIL, int &INFO);
void zgeqrf( const int *M, const int *N, std::complex<double> *A, const int *LDA, std::complex<double> *TAU, std::complex<double> *WORK, const int *LWORK, int *INFO );
void zungqr( const int *M, const int *N, const int *K, std::complex<double> *A, const int *LDA, std::complex<double> *TAU, std::complex<double> *WORK, const int *LWORK, int *INFO );
void dger(const int* m, const int* n, const double* alpha
, const double* x, const int* incx, const double* y, const int* incy
, double* a, const int* lda);

View File

@ -55,6 +55,14 @@ ENDIF()
TARGET_LINK_LIBRARIES(${p} qmc qmcdriver qmcham qmcwfs qmcbase qmcutil adios_config)
IF(BUILD_AFQMC)
TARGET_LINK_LIBRARIES(${p} afqmc)
ENDIF(BUILD_AFQMC)
IF(BUILD_FCIQMC)
TARGET_LINK_LIBRARIES(${p} fciqmc)
ENDIF(BUILD_FCIQMC)
FOREACH(l ${QMC_UTIL_LIBS})
TARGET_LINK_LIBRARIES(${p} ${l})
ENDFOREACH(l ${QMC_UTIL_LIBS})

View File

@ -52,6 +52,12 @@ extern "C" {
#include <adios_error.h>
}
#endif
#ifdef BUILD_AFQMC
#include "AFQMC/AFQMCFactory.h"
#endif
#ifdef BUILD_FCIQMC
#include "FCIQMC/App/SQCFactory.h"
#endif
#define STR_VAL(arg) #arg
#define GET_MACRO_VAL(arg) STR_VAL(arg)
@ -108,6 +114,72 @@ bool QMCMain::execute()
ERRORMSG("No valid input file exists! Aborting QMCMain::execute")
return false;
}
std::string simulationType = "realspaceQMC";
{ // mmorales: is this necessary??? Don't want to leave xmlNodes lying around unused
xmlNodePtr cur=XmlDocStack.top()->getRoot();
OhmmsAttributeSet simType;
simType.add (simulationType, "type");
simType.add (simulationType, "name");
simType.add (simulationType, "method");
simType.put(cur);
}
#ifdef BUILD_AFQMC
if(simulationType == "afqmc") {
app_log() << std::endl << "/*************************************************\n"
<< " ******** This is an AFQMC calculation ********\n"
<< " *************************************************" <<std::endl;
xmlNodePtr cur=XmlDocStack.top()->getRoot();
xmlXPathContextPtr m_context = XmlDocStack.top()->getXPathContext();
//initialize the random number generator
xmlNodePtr rptr = myRandomControl.initialize(m_context);
AFQMCFactory afqmc_fac(myComm,myRandomControl);
if(!afqmc_fac.parse(cur)) {
app_log()<<" Error in AFQMCFactory::parse() ." <<std::endl;
return false;
}
cur=XmlDocStack.top()->getRoot();
return afqmc_fac.execute(cur);
} else
#else
if(simulationType == "afqmc") {
app_error()<<" Executable not compiled with AFQMC. Recompile with BUILD_AFQMC set to 1." <<std::endl;
return false;
}
#endif
#ifdef BUILD_FCIQMC
if(simulationType == "fciqmc") {
app_log() << std::endl << "/*************************************************\n"
<< " ******** This is a FCIQMC calculation ********\n"
<< " *************************************************" <<std::endl;
xmlNodePtr cur=XmlDocStack.top()->getRoot();
xmlXPathContextPtr m_context = XmlDocStack.top()->getXPathContext();
//initialize the random number generator
xmlNodePtr rptr = myRandomControl.initialize(m_context);
SQCFactory fciqmc_fac(myComm,myRandomControl);
if(!fciqmc_fac.parse(cur)) {
app_log()<<" Error in SQCFactory::parse() ." <<std::endl;
return false;
}
cur=XmlDocStack.top()->getRoot();
return fciqmc_fac.execute(cur);
}
#else
if(simulationType == "fciqmc") {
app_error()<<" Executable not compiled with FCIQMC. Recompile with BUILD_FCIQMC set to 1." <<std::endl;
return false;
}
#endif
//validate the input file
bool success = validateXML();
if(!success)

View File

@ -210,6 +210,12 @@
/* Setting full precision for CUDA kernels */
#cmakedefine CUDA_PRECISION_FULL @CUDA_PRECISION_FULL@
/* For AFQMC compilation */
#cmakedefine BUILD_AFQMC @BUILD_AFQMC@
/* For FCIQMC compilation */
#cmakedefine BUILD_FCIQMC @BUILD_FCIQMC@
#cmakedefine DEBUG_PSIBUFFER_ON @DEBUG_PSIBUFFER_ON@
/* Disable trace manager and associated features */

View File

@ -89,7 +89,7 @@ bool hdf_archive::create(const std::string& fname, unsigned flags)
return file_id != is_closed;
}
bool hdf_archive::open(const std::string& fname,unsigned flags)
bool hdf_archive::open(const std::string& fname,unsigned flags,bool create_if_fail)
{
if(Mode[NOIO])
return true;
@ -97,6 +97,10 @@ bool hdf_archive::open(const std::string& fname,unsigned flags)
file_id = H5Fopen(fname.c_str(),flags,access_id);
// if(file_id==is_closed)
// file_id = H5Fcreate(fname.c_str(),flags,H5P_DEFAULT,access_id);
// mmorales: flags for open and create are different.
// not sure this did anything before, hopefully it doesn't break old code
if(file_id==is_closed && create_if_fail)
file_id = H5Fcreate(fname.c_str(),H5F_ACC_EXCL,H5P_DEFAULT,access_id);
return file_id != is_closed;
}

View File

@ -81,7 +81,7 @@ struct hdf_archive
* @param flags i/o mode
* @return file_id, if open is successful
*/
bool open(const std::string& fname,unsigned flags=H5F_ACC_RDWR);
bool open(const std::string& fname,unsigned flags=H5F_ACC_RDWR,bool create_if_fail=true);
///close all the open groups and file
void close();