Merge branch 'QMCPACK:develop' into develop

This commit is contained in:
Jakub Kurzak 2021-05-27 13:28:05 -04:00
commit 1da848f377
38 changed files with 1743 additions and 118 deletions

View File

@ -227,6 +227,7 @@ CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
# MPIP_PROFILE profile mpi performance
######################################################################
OPTION(BUILD_UNIT_TESTS "Build unit tests" ON)
OPTION(BUILD_MICRO_BENCHMARKS "Build micro benchmarks" ON)
OPTION(BUILD_LMYENGINE_INTERFACE "Build LMY engine" ON)
IF (QMC_CUDA AND BUILD_LMYENGINE_INTERFACE)
MESSAGE(STATUS "LMY engine is not compatible with CUDA build! Disabling LMY engine")

View File

@ -8,6 +8,7 @@
// Copy and modify the ComplexApprox class to handle complex numbers for log(complex)
namespace Catch {
namespace Detail {
class LogComplexApprox
{
std::complex<double> m_value;
@ -79,20 +80,21 @@ public:
return os;
}
};
}
template<>
struct StringMaker<LogComplexApprox> {
static std::string convert(LogComplexApprox const &value);
struct StringMaker<Catch::Detail::LogComplexApprox> {
static std::string convert(Catch::Detail::LogComplexApprox const &value);
};
#ifdef CATCH_IMPL
std::string StringMaker<LogComplexApprox>::convert(LogComplexApprox const& value)
std::string StringMaker<Catch::Detail::LogComplexApprox>::convert(Catch::Detail::LogComplexApprox const& value)
{
return value.toString();
}
#endif
}
using Catch::LogComplexApprox;
using Catch::Detail::LogComplexApprox;
#endif

View File

@ -45,7 +45,7 @@ CSEnergyEstimator::CSEnergyEstimator(QMCHamiltonian& h, int hcopy)
scalars_saved.resize(scalars.size());
}
ScalarEstimatorBase* CSEnergyEstimator::clone() { return new CSEnergyEstimator(*this); }
CSEnergyEstimator* CSEnergyEstimator::clone() { return new CSEnergyEstimator(*this); }
/** add the local energy, variance and all the Hamiltonian components to the scalar record container
*@param record storage of scalar records (name,value)

View File

@ -82,7 +82,7 @@ struct CSEnergyEstimator : public ScalarEstimatorBase
*/
void add2Record(RecordNamedProperty<RealType>& record) override;
void registerObservables(std::vector<ObservableHelper>& h5dec, hid_t gid) override;
ScalarEstimatorBase* clone() override;
CSEnergyEstimator* clone() override;
void evaluateDiff();
};

View File

@ -25,7 +25,7 @@ LocalEnergyEstimator::LocalEnergyEstimator(QMCHamiltonian& h, bool use_hdf5) : U
scalars_saved.resize(SizeOfHamiltonians + LE_MAX);
}
ScalarEstimatorBase* LocalEnergyEstimator::clone() { return new LocalEnergyEstimator(*this); }
LocalEnergyEstimator* LocalEnergyEstimator::clone() { return new LocalEnergyEstimator(*this); }
void LocalEnergyEstimator::registerObservables(std::vector<ObservableHelper>& h5desc, hid_t gid)
{

View File

@ -75,7 +75,7 @@ public:
}
void add2Record(RecordListType& record) override;
void registerObservables(std::vector<ObservableHelper>& h5desc, hid_t gid) override;
ScalarEstimatorBase* clone() override;
LocalEnergyEstimator* clone() override;
/*@}*/
inline void accumulate(const RefVector<MCPWalker>& walkers) override

View File

@ -65,7 +65,7 @@ struct LocalEnergyOnlyEstimator : public ScalarEstimatorBase
clear();
}
ScalarEstimatorBase* clone() override { return new LocalEnergyOnlyEstimator(); }
LocalEnergyOnlyEstimator* clone() override { return new LocalEnergyOnlyEstimator(); }
};
} // namespace qmcplusplus
#endif

View File

@ -25,7 +25,7 @@ RMCLocalEnergyEstimator::RMCLocalEnergyEstimator(QMCHamiltonian& h, int nobs) :
scalars_saved.resize(2 * SizeOfHamiltonians + RMCSpecificTerms);
}
ScalarEstimatorBase* RMCLocalEnergyEstimator::clone() { return new RMCLocalEnergyEstimator(*this); }
RMCLocalEnergyEstimator* RMCLocalEnergyEstimator::clone() { return new RMCLocalEnergyEstimator(*this); }
/** add the local energy, variance and all the Hamiltonian components to the scalar record container
* @param record storage of scalar records (name,value)

View File

@ -143,7 +143,7 @@ public:
void add2Record(RecordListType& record) override;
void registerObservables(std::vector<ObservableHelper>& h5dec, hid_t gid) override {}
ScalarEstimatorBase* clone() override;
RMCLocalEnergyEstimator* clone() override;
/*@}*/
};
} // namespace qmcplusplus

View File

@ -73,7 +73,7 @@ std::vector<int> SpinDensityNew::getSpeciesSize(SpeciesSet& species)
size_t SpinDensityNew::getFullDataSize() { return species_.size() * derived_parameters_.npoints; }
OperatorEstBase* SpinDensityNew::clone()
SpinDensityNew* SpinDensityNew::clone()
{
std::cout << "SpinDensity clone called\n";
return new SpinDensityNew(*this);

View File

@ -77,7 +77,7 @@ public:
/** standard interface
*/
OperatorEstBase* clone() override;
SpinDensityNew* clone() override;
/** accumulate 1 or more walkers of SpinDensity samples
*/

View File

@ -25,7 +25,7 @@ class FakeEstimator : public ScalarEstimatorBase
void registerObservables(std::vector<ObservableHelper>& h5dec, hid_t gid) override {}
ScalarEstimatorBase* clone() override { return new FakeEstimator; }
FakeEstimator* clone() override { return new FakeEstimator; }
};
} // namespace qmcplusplus

View File

@ -34,8 +34,8 @@ public:
void registerOperatorEstimator(hid_t gid) override {}
void startBlock(int nsteps) override {}
OperatorEstBase* clone() override { return new FakeOperatorEstimator(*this); }
FakeOperatorEstimator* clone() override { return new FakeOperatorEstimator(*this); }
void set_walker_weights(QMCT::RealType weight) { walkers_weight_ = weight; }
};

View File

@ -48,7 +48,7 @@ TEST_CASE("LocalEnergy", "[estimators]")
QMCHamiltonian H;
LocalEnergyEstimator le_est(H, false);
std::unique_ptr<LocalEnergyEstimator> le_est2{dynamic_cast<LocalEnergyEstimator*>(le_est.clone())};
std::unique_ptr<LocalEnergyEstimator> le_est2{le_est.clone()};
REQUIRE(le_est2 != nullptr);
REQUIRE(le_est2.get() != &le_est);

View File

@ -2,9 +2,10 @@
#// This file is distributed under the University of Illinois/NCSA Open Source License.
#// See LICENSE file in top directory for details.
#//
#// Copyright (c) 2020 QMCPACK developers.
#// Copyright (c) 2021 QMCPACK developers.
#//
#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
#// Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
#//
#// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
#//////////////////////////////////////////////////////////////////////////////////////
@ -25,7 +26,28 @@ IF(HAVE_MPI)
SET_PROPERTY(TARGET message APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${qmcpack_SOURCE_DIR}/external_codes/mpi_wrapper")
ENDIF()
ADD_LIBRARY(catch_main catch_main.cpp)
TARGET_INCLUDE_DIRECTORIES(catch_main PUBLIC "${PROJECT_SOURCE_DIR}/external_codes/catch")
# This plus using the definition of the catch_benchmark_main as INTERFACE target
# would allow just building catch main once for benchmarking and mpi enabled testing
# But it causes a segv on power9 with clang12 and on alcf-gnu-mkl18
# TARGET_COMPILE_DEFINITIONS(catch_main PRIVATE "CATCH_CONFIG_ENABLE_BENCHMARKING")
TARGET_LINK_LIBRARIES(catch_main message)
IF(HAVE_MPI)
SET_PROPERTY(TARGET catch_main APPEND PROPERTY COMPILE_DEFINITIONS "CATCH_MAIN_HAVE_MPI")
ENDIF()
add_library(catch_main_no_mpi catch_main.cpp)
TARGET_INCLUDE_DIRECTORIES(catch_main_no_mpi PUBLIC "${PROJECT_SOURCE_DIR}/external_codes/catch")
# ADD_LIBRARY(catch_benchmark_main INTERFACE)
# SET_TARGET_PROPERTIES(catch_benchmark_main PROPERTIES INTERFACE_COMPILE_DEFINITIONS "CATCH_CONFIG_ENABLE_BENCHMARKING")
# TARGET_LINK_LIBRARIES(catch_benchmark_main INTERFACE catch_main)
ADD_LIBRARY(catch_benchmark_main catch_main.cpp)
TARGET_COMPILE_DEFINITIONS(catch_benchmark_main PUBLIC "CATCH_CONFIG_ENABLE_BENCHMARKING")
TARGET_LINK_LIBRARIES(catch_benchmark_main message)
IF(HAVE_MPI)
SET_PROPERTY(TARGET catch_benchmark_main APPEND PROPERTY COMPILE_DEFINITIONS "CATCH_MAIN_HAVE_MPI")
ENDIF()
TARGET_INCLUDE_DIRECTORIES(catch_benchmark_main PUBLIC "${PROJECT_SOURCE_DIR}/external_codes/catch")

View File

@ -13,7 +13,9 @@
#define CATCH_CONFIG_RUNNER
#include "catch.hpp"
#ifdef CATCH_MAIN_HAVE_MPI
#include "Message/Communicate.h"
#endif
// Replacement unit test main function to ensure that MPI is finalized once
// (and only once) at the end of the unit test.
@ -23,7 +25,7 @@ std::string UTEST_HAMIL, UTEST_WFN;
int main(int argc, char* argv[])
{
#ifdef HAVE_MPI
#ifdef CATCH_MAIN_HAVE_MPI
mpi3::environment env(argc, argv);
OHMMS::Controller->initialize(env);
#endif
@ -38,7 +40,9 @@ int main(int argc, char* argv[])
int parser_err = session.applyCommandLine(argc, argv);
// Run the tests.
int result = session.run(argc, argv);
#ifdef CATCH_MAIN_HAVE_MPI
OHMMS::Controller->finalize();
#endif
if (parser_err != 0)
{
return parser_err;

View File

@ -2,7 +2,7 @@
#// This file is distributed under the University of Illinois/NCSA Open Source License.
#// See LICENSE file in top directory for details.
#//
#// Copyright (c) 2020 QMCPACK developers.
#// Copyright (c) 2021 QMCPACK developers.
#//
#// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory
#//

View File

@ -2,7 +2,7 @@
#// This file is distributed under the University of Illinois/NCSA Open Source License.
#// See LICENSE file in top directory for details.
#//
#// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers.
#// Copyright (c) 2021 Jeongnim Kim and QMCPACK developers.
#//
#// File developed by: Bryan Clark, bclark@Princeton.edu, Princeton University
#// Ken Esler, kpesler@gmail.com, University of Illinois at Urbana-Champaign
@ -14,6 +14,7 @@
#// Jaron T. Krogel, krogeljt@ornl.gov, Oak Ridge National Laboratory
#// Mark Dewing, markdewing@gmail.com, University of Illinois at Urbana-Champaign
#// Luke Shulenburger, lshulen@sandia.gov, Sandia National Laboratories
#// Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
#//
#// File created by: Bryan Clark, bclark@Princeton.edu, Princeton University
#//////////////////////////////////////////////////////////////////////////////////////

View File

@ -89,17 +89,6 @@ inline int Xgetri(int n,
return status;
}
template<typename TIN, typename TOUT>
inline void TansposeSquare(const TIN* restrict in, TOUT* restrict out, size_t n, size_t lda)
{
#pragma omp simd
for (size_t i = 0; i < n; ++i)
for (size_t j = 0; j < n; ++j)
out[i * lda + j] = in[i + j * lda];
}
template<typename T, typename T_FP>
inline void computeLogDet(const T* restrict diag, int n, const int* restrict pivot, std::complex<T_FP>& logdet)
{
@ -130,7 +119,14 @@ class DiracMatrix
Lwork = -1;
T_FP tmp;
real_type_fp lw;
Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork);
int status = Xgetri(lda, invMat_ptr, lda, m_pivot.data(), &tmp, Lwork);
if (status != 0)
{
std::ostringstream msg;
msg << "Xgetri failed with error " << status << std::endl;
throw std::runtime_error(msg.str());
}
convert(tmp, lw);
Lwork = static_cast<int>(lw);
m_work.resize(Lwork);

View File

@ -189,6 +189,93 @@ void MultiDiracDeterminant::BuildDotProductsAndCalculateRatios(int ref,
#endif
}
void MultiDiracDeterminant::mw_evaluateDetsForPtclMove(const RefVectorWithLeader<MultiDiracDeterminant>& det_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat)
{
const int nw = det_list.size();
MultiDiracDeterminant& det_leader = det_list.getLeader();
det_leader.RatioTimer.start();
det_leader.UpdateMode = ORB_PBYP_RATIO;
/* FOR YE: THIS IS NOT compiling...
std::vector<RefVector<Vector<ValueVector_t>>> psiV_list;
for (size_t iw=0;iw<nw;iw++)
{
MultiDiracDeterminant& det= (det_list[iw]);
psiV_list.push_back(det.psiV);
}
det_leader.evalOrbTimer.start()
det_leader.Phi->mw_evaluateValue(P_list[iw], iat, psiV_list);
*/
if (det_leader.NumPtcls == 1)
for (size_t iw = 0; iw < nw; iw++)
{
MultiDiracDeterminant& det = (det_list[iw]);
det.UpdateMode = ORB_PBYP_RATIO;
det.Phi->evaluateValue(P_list[iw], iat, det.psiV);
const int WorkingIndex = iat - det.FirstIndex;
auto it(det.ciConfigList->begin());
auto last(det.ciConfigList->end());
ValueVector_t::iterator detval(det.new_detValues.begin()); ///Used detval instead of det.
while (it != last)
{
size_t orb = (it++)->occup[0];
*(detval++) = det.psiV[orb];
}
}
else
{
for (size_t iw = 0; iw < nw; iw++)
{
MultiDiracDeterminant& det = (det_list[iw]);
det.UpdateMode = ORB_PBYP_RATIO;
det.evalOrbTimer.start();
det.Phi->evaluateValue(P_list[iw], iat, det.psiV);
det.evalOrbTimer.stop();
const int WorkingIndex = iat - det.FirstIndex;
const auto& confgList = *det.ciConfigList;
///std::vector<int>::iterator it(confgList[ReferenceDeterminant].occup.begin());
auto it(confgList[det.ReferenceDeterminant].occup.begin());
// mmorales: the only reason this is here is because
// NonlocalECP do not necessarily call rejectMove after
// calling ratio(), and even if the move is rejected
// this matrix needs to be restored
// If we always restore after ratio, then this is not needed
// For efficiency reasons, I don't do this for ratioGrad or ratio(P,dG,dL)
det.ExtraStuffTimer.start();
det.psiMinv_temp = det.psiMinv;
for (size_t i = 0; i < det_leader.NumPtcls; i++)
det.psiV_temp[i] = det.psiV[*(it++)];
//template<typename MatA, typename VecB>
//inline typename MatA::value_type DetRatioByColumn(const MatA& Minv, const VecB& newv, int colchanged)
//{
// //use BLAS dot since the stride is not uniform
// // return simd::dot(Minv.cols(), Minv.data() + colchanged, Minv.cols(), newv.data(), 1);
// // }
// //
ValueType ratioRef = DetRatioByColumn(det.psiMinv_temp, det.psiV_temp, WorkingIndex);
// ValueType ratioRef =simd::dot( etRatioByColumn(det.psiMinv_temp, det.psiV_temp, WorkingIndex);
det.new_detValues[det.ReferenceDeterminant] = ratioRef * det.detValues[det.ReferenceDeterminant];
InverseUpdateByColumn(det.psiMinv_temp, det.psiV_temp, det.workV1, det.workV2, WorkingIndex, ratioRef);
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.psiV[i];
det.ExtraStuffTimer.stop();
det.BuildDotProductsAndCalculateRatios(det.ReferenceDeterminant, WorkingIndex, det.new_detValues,
det.psiMinv_temp, det.TpsiM, det.dotProducts, *det.detData,
*det.uniquePairs, *det.DetSigns);
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.psiM(WorkingIndex, i);
}
}
det_leader.RatioTimer.stop();
}
void MultiDiracDeterminant::evaluateDetsForPtclMove(const ParticleSet& P, int iat, int refPtcl)
{
UpdateMode = ORB_PBYP_RATIO;
@ -305,6 +392,174 @@ void MultiDiracDeterminant::evaluateDetsAndGradsForPtclMove(const ParticleSet& P
}
}
void MultiDiracDeterminant::evaluateDetsAndGradsForPtclMoveWithSpin(const ParticleSet& P, int iat)
{
assert(P.is_spinor_ == is_spinor_);
UpdateMode = ORB_PBYP_PARTIAL;
evalOrb1Timer.start();
Phi->evaluateVGL_spin(P, iat, psiV, dpsiV, d2psiV, dspin_psiV);
evalOrb1Timer.stop();
const int WorkingIndex = iat - FirstIndex;
assert(WorkingIndex >= 0 && WorkingIndex < LastIndex - FirstIndex);
if (NumPtcls == 1)
{
std::vector<ci_configuration2>::iterator it(ciConfigList->begin());
std::vector<ci_configuration2>::iterator last(ciConfigList->end());
ValueVector_t::iterator det(new_detValues.begin());
GradMatrix_t::iterator grad(new_grads.begin());
ValueMatrix_t::iterator spingrad(new_spingrads.begin());
while (it != last)
{
size_t orb = (it++)->occup[0];
*(det++) = psiV[orb];
*(grad++) = dpsiV[orb];
*(spingrad++) = dspin_psiV[orb];
}
}
else
{
ExtraStuffTimer.start();
//mmorales: check comment above
psiMinv_temp = psiMinv;
const auto& confgList = *ciConfigList;
//std::vector<int>::iterator it(confgList[ReferenceDeterminant].occup.begin());
auto it(confgList[ReferenceDeterminant].occup.begin());
GradType ratioGradRef;
ValueType ratioSpinGradRef = 0.0;
for (size_t i = 0; i < NumPtcls; i++)
{
psiV_temp[i] = psiV[*it];
ratioGradRef += psiMinv_temp(i, WorkingIndex) * dpsiV[*it];
ratioSpinGradRef += psiMinv_temp(i, WorkingIndex) * dspin_psiV[*it];
it++;
}
ValueType ratioRef = DetRatioByColumn(psiMinv_temp, psiV_temp, WorkingIndex);
new_grads(ReferenceDeterminant, WorkingIndex) = ratioGradRef * detValues[ReferenceDeterminant];
new_spingrads(ReferenceDeterminant, WorkingIndex) = ratioSpinGradRef * detValues[ReferenceDeterminant];
new_detValues[ReferenceDeterminant] = ratioRef * detValues[ReferenceDeterminant];
InverseUpdateByColumn(psiMinv_temp, psiV_temp, workV1, workV2, WorkingIndex, ratioRef);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = psiV[i];
ExtraStuffTimer.stop();
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, WorkingIndex, new_detValues, psiMinv_temp, TpsiM,
dotProducts, *detData, *uniquePairs, *DetSigns);
for (size_t idim = 0; idim < OHMMS_DIM; idim++)
{
ExtraStuffTimer.start();
//dpsiMinv = psiMinv_temp;
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
for (size_t i = 0; i < NumPtcls; i++)
psiV_temp[i] = dpsiV[*(it++)][idim];
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, WorkingIndex, ratioGradRef[idim]);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = dpsiV[i][idim];
ExtraStuffTimer.stop();
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, WorkingIndex, new_grads, dpsiMinv, TpsiM, dotProducts,
*detData, *uniquePairs, *DetSigns, idim);
}
//Now compute the spin gradient, same procedure as normal gradient components above
ExtraStuffTimer.start();
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
for (size_t i = 0; i < NumPtcls; i++)
psiV_temp[i] = dspin_psiV[*(it++)];
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, WorkingIndex, ratioSpinGradRef);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = dspin_psiV[i];
ExtraStuffTimer.stop();
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, WorkingIndex, new_spingrads, dpsiMinv, TpsiM, dotProducts,
*detData, *uniquePairs, *DetSigns);
// check comment above
for (int i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = psiM(WorkingIndex, i);
}
}
void MultiDiracDeterminant::mw_evaluateDetsAndGradsForPtclMove(
const RefVectorWithLeader<MultiDiracDeterminant>& det_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat)
{
const int nw = det_list.size();
MultiDiracDeterminant& det_leader = det_list.getLeader();
det_leader.UpdateMode = ORB_PBYP_RATIO;
//det_leader.evalOrb1Timer.start();
//mw_evaluateVGL(P_list[iw], iat, psiV_list, dpsiV_list, d2psiV_list);
//det_leader.evalOrb1Timer.stop();
if (det_leader.NumPtcls == 1)
for (size_t iw = 0; iw < nw; iw++)
{
MultiDiracDeterminant& det = (det_list[iw]);
det.evalOrb1Timer.start();
det.Phi->evaluateVGL(P_list[iw], iat, det.psiV, det.dpsiV, det.d2psiV);
det.evalOrb1Timer.stop();
const int WorkingIndex = iat - det.FirstIndex;
auto it(det.ciConfigList->begin());
auto last(det.ciConfigList->end());
ValueVector_t::iterator detval(det.new_detValues.begin());
GradMatrix_t::iterator gradval(det.new_grads.begin());
while (it != last)
{
size_t orb = (it++)->occup[0];
*(detval++) = det.psiV[orb];
*(gradval++) = det.dpsiV[orb];
}
}
else
{
for (size_t iw = 0; iw < nw; iw++)
{
MultiDiracDeterminant& det = (det_list[iw]);
det.evalOrb1Timer.start();
det.Phi->evaluateVGL(P_list[iw], iat, det.psiV, det.dpsiV, det.d2psiV);
det.evalOrb1Timer.stop();
const int WorkingIndex = iat - det.FirstIndex;
det.ExtraStuffTimer.start();
det.psiMinv_temp = det.psiMinv;
const auto& confgList = *det.ciConfigList;
auto it(confgList[det.ReferenceDeterminant].occup.begin());
GradType ratioGradRef;
for (size_t i = 0; i < det_leader.NumPtcls; i++)
{
det.psiV_temp[i] = det.psiV[*it];
ratioGradRef += det.psiMinv_temp(i, WorkingIndex) * det.dpsiV[*it];
it++;
}
ValueType ratioRef = DetRatioByColumn(det.psiMinv_temp, det.psiV_temp, WorkingIndex);
det.new_grads(det.ReferenceDeterminant, WorkingIndex) = ratioGradRef * det.detValues[det.ReferenceDeterminant];
det.new_detValues[det.ReferenceDeterminant] = ratioRef * det.detValues[det.ReferenceDeterminant];
InverseUpdateByColumn(det.psiMinv_temp, det.psiV_temp, det.workV1, det.workV2, WorkingIndex, ratioRef);
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.psiV[i];
det.ExtraStuffTimer.stop();
det.BuildDotProductsAndCalculateRatios(det.ReferenceDeterminant, WorkingIndex, det.new_detValues,
det.psiMinv_temp, det.TpsiM, det.dotProducts, *det.detData,
*det.uniquePairs, *det.DetSigns);
for (size_t idim = 0; idim < OHMMS_DIM; idim++)
{
det.ExtraStuffTimer.start();
det.dpsiMinv = det.psiMinv;
it = confgList[det.ReferenceDeterminant].occup.begin();
for (size_t i = 0; i < det_leader.NumPtcls; i++)
det.psiV_temp[i] = det.dpsiV[*(it++)][idim];
InverseUpdateByColumn(det.dpsiMinv, det.psiV_temp, det.workV1, det.workV2, WorkingIndex, ratioGradRef[idim]);
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.dpsiV[i][idim];
det.ExtraStuffTimer.stop();
det.BuildDotProductsAndCalculateRatios(det.ReferenceDeterminant, WorkingIndex, det.new_grads, det.dpsiMinv,
det.TpsiM, det.dotProducts, *det.detData, *det.uniquePairs,
*det.DetSigns, idim);
}
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.psiM(WorkingIndex, i);
}
}
}
void MultiDiracDeterminant::evaluateGrads(ParticleSet& P, int iat)
{
const int WorkingIndex = iat - FirstIndex;
@ -351,6 +606,138 @@ void MultiDiracDeterminant::evaluateGrads(ParticleSet& P, int iat)
}
}
void MultiDiracDeterminant::evaluateGradsWithSpin(ParticleSet& P, int iat)
{
assert(P.is_spinor_ == is_spinor_);
const int WorkingIndex = iat - FirstIndex;
assert(WorkingIndex >= 0 && WorkingIndex < LastIndex - FirstIndex);
const auto& confgList = *ciConfigList;
auto it = confgList[0].occup.begin(); //just to avoid using the type
//std::vector<size_t>::iterator it;
if (NumPtcls == 1)
{
std::vector<ci_configuration2>::const_iterator it(confgList.begin());
std::vector<ci_configuration2>::const_iterator last(confgList.end());
GradMatrix_t::iterator grad(grads.begin());
ValueMatrix_t::iterator spingrad(spingrads.begin());
while (it != last)
{
size_t orb = (it++)->occup[0];
*(grad++) = dpsiM(0, orb);
*(spingrad++) = dspin_psiM(0, orb);
}
}
else
{
for (size_t idim = 0; idim < OHMMS_DIM; idim++)
{
//dpsiMinv = psiMinv_temp;
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
ValueType ratioG = 0.0;
for (size_t i = 0; i < NumPtcls; i++)
{
psiV_temp[i] = dpsiM(WorkingIndex, *it)[idim];
ratioG += psiMinv(i, WorkingIndex) * dpsiM(WorkingIndex, *it)[idim];
it++;
}
grads(ReferenceDeterminant, WorkingIndex)[idim] = ratioG * detValues[ReferenceDeterminant];
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, WorkingIndex, ratioG);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = dpsiM(WorkingIndex, i)[idim];
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, WorkingIndex, grads, dpsiMinv, TpsiM, dotProducts,
*detData, *uniquePairs, *DetSigns, idim);
}
//Now compute the spin gradient, same procedure as normal gradient components above
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
ValueType ratioSG = 0.0;
for (size_t i = 0; i < NumPtcls; i++)
{
psiV_temp[i] = dspin_psiM(WorkingIndex, *it);
ratioSG += psiMinv(i, WorkingIndex) * dspin_psiM(WorkingIndex, *it);
it++;
}
spingrads(ReferenceDeterminant, WorkingIndex) = ratioSG * detValues[ReferenceDeterminant];
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, WorkingIndex, ratioSG);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = dspin_psiM(WorkingIndex, i);
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, WorkingIndex, spingrads, dpsiMinv, TpsiM, dotProducts,
*detData, *uniquePairs, *DetSigns);
// check comment above
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, WorkingIndex) = psiM(WorkingIndex, i);
}
}
void MultiDiracDeterminant::mw_evaluateGrads(const RefVectorWithLeader<MultiDiracDeterminant>& det_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat)
{
const int nw = det_list.size();
MultiDiracDeterminant& det_leader = det_list.getLeader();
det_leader.UpdateMode = ORB_PBYP_RATIO;
if (det_leader.NumPtcls == 1)
for (size_t iw = 0; iw < nw; iw++)
{
MultiDiracDeterminant& det = (det_list[iw]);
det.UpdateMode = ORB_PBYP_RATIO;
const int WorkingIndex = iat - det.FirstIndex;
const auto& confgList = *det.ciConfigList;
//auto it = confgList[0].occup.begin();
std::vector<ci_configuration2>::const_iterator it(confgList.begin());
std::vector<ci_configuration2>::const_iterator last(confgList.end());
GradMatrix_t::iterator gradval(det.grads.begin()); //using gardval instead of grads to avoid confusion
while (it != last)
{
size_t orb = (it++)->occup[0];
*(gradval++) = det.dpsiM(0, orb);
}
}
else
{
for (size_t iw = 0; iw < nw; iw++)
{
MultiDiracDeterminant& det = (det_list[iw]);
det.UpdateMode = ORB_PBYP_RATIO;
const int WorkingIndex = iat - det.FirstIndex;
const auto& confgList = *det.ciConfigList;
auto it = confgList[0].occup.begin();
for (size_t idim = 0; idim < OHMMS_DIM; idim++)
{
//dpsiMinv = psiMinv_temp;
det.dpsiMinv = det.psiMinv;
it = confgList[det.ReferenceDeterminant].occup.begin();
ValueType ratioG = 0.0;
for (size_t i = 0; i < det_leader.NumPtcls; i++)
{
det.psiV_temp[i] = det.dpsiM(WorkingIndex, *it)[idim];
ratioG += det.psiMinv(i, WorkingIndex) * det.dpsiM(WorkingIndex, *it)[idim];
it++;
}
det.grads(det.ReferenceDeterminant, WorkingIndex)[idim] = ratioG * det.detValues[det.ReferenceDeterminant];
InverseUpdateByColumn(det.dpsiMinv, det.psiV_temp, det.workV1, det.workV2, WorkingIndex, ratioG);
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.dpsiM(WorkingIndex, i)[idim];
det.BuildDotProductsAndCalculateRatios(det.ReferenceDeterminant, WorkingIndex, det.grads, det.dpsiMinv,
det.TpsiM, det.dotProducts, *det.detData, *det.uniquePairs,
*det.DetSigns, idim);
}
for (size_t i = 0; i < det_leader.NumOrbitals; i++)
det.TpsiM(i, WorkingIndex) = det.psiM(WorkingIndex, i);
}
}
}
void MultiDiracDeterminant::evaluateAllForPtclMove(const ParticleSet& P, int iat)
{
UpdateMode = ORB_PBYP_ALL;
@ -480,5 +867,4 @@ void MultiDiracDeterminant::evaluateAllForPtclMove(const ParticleSet& P, int iat
}
}
} // namespace qmcplusplus

View File

@ -189,12 +189,127 @@ void MultiDiracDeterminant::evaluateForWalkerMove(const ParticleSet& P, bool fro
evalWTimer.stop();
}
void MultiDiracDeterminant::evaluateForWalkerMoveWithSpin(const ParticleSet& P, bool fromScratch)
{
evalWTimer.start();
if (fromScratch)
Phi->evaluate_notranspose_spin(P, FirstIndex, LastIndex, psiM, dpsiM, d2psiM, dspin_psiM);
if (NumPtcls == 1)
{
//APP_ABORT("Evaluate Log with 1 particle in MultiDiracDeterminant is potentially dangerous. Fix later");
std::vector<ci_configuration2>::const_iterator it(ciConfigList->begin());
std::vector<ci_configuration2>::const_iterator last(ciConfigList->end());
ValueVector_t::iterator det(detValues.begin());
ValueMatrix_t::iterator lap(lapls.begin());
GradMatrix_t::iterator grad(grads.begin());
ValueMatrix_t::iterator spingrad(spingrads.begin());
while (it != last)
{
int orb = (it++)->occup[0];
*(det++) = psiM(0, orb);
*(lap++) = d2psiM(0, orb);
*(grad++) = dpsiM(0, orb);
*(spingrad++) = dspin_psiM(0, orb);
}
}
else
{
InverseTimer.start();
const auto& confgList = *ciConfigList;
//std::vector<int>::iterator it(confgList[ReferenceDeterminant].occup.begin());
auto it(confgList[ReferenceDeterminant].occup.begin());
for (size_t i = 0; i < NumPtcls; i++)
{
for (size_t j = 0; j < NumPtcls; j++)
psiMinv(j, i) = psiM(j, *it);
it++;
}
for (size_t i = 0; i < NumPtcls; i++)
{
for (size_t j = 0; j < NumOrbitals; j++)
TpsiM(j, i) = psiM(i, j);
}
std::complex<RealType> logValueRef;
InvertWithLog(psiMinv.data(), NumPtcls, NumPtcls, WorkSpace.data(), Pivot.data(), logValueRef);
InverseTimer.stop();
const RealType detsign = (*DetSigns)[ReferenceDeterminant];
const ValueType det0 = LogToValue<ValueType>::convert(logValueRef);
detValues[ReferenceDeterminant] = det0;
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, 0, detValues, psiMinv, TpsiM, dotProducts, *detData,
*uniquePairs, *DetSigns);
for (size_t iat = 0; iat < NumPtcls; iat++)
{
it = confgList[ReferenceDeterminant].occup.begin();
GradType gradRatio;
ValueType ratioLapl = 0.0;
ValueType spingradRatio = 0.0;
for (size_t i = 0; i < NumPtcls; i++)
{
gradRatio += psiMinv(i, iat) * dpsiM(iat, *it);
ratioLapl += psiMinv(i, iat) * d2psiM(iat, *it);
spingradRatio += psiMinv(i, iat) * dspin_psiM(iat, *it);
it++;
}
grads(ReferenceDeterminant, iat) = det0 * gradRatio;
lapls(ReferenceDeterminant, iat) = det0 * ratioLapl;
spingrads(ReferenceDeterminant, iat) = det0 * spingradRatio;
for (size_t idim = 0; idim < OHMMS_DIM; idim++)
{
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
for (size_t i = 0; i < NumPtcls; i++)
psiV_temp[i] = dpsiM(iat, *(it++))[idim];
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, iat, gradRatio[idim]);
//MultiDiracDeterminant::InverseUpdateByColumn_GRAD(dpsiMinv,dpsiV,workV1,workV2,iat,gradRatio[idim],idim);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, iat) = dpsiM(iat, i)[idim];
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, iat, grads, dpsiMinv, TpsiM, dotProducts, *detData,
*uniquePairs, *DetSigns, idim);
}
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
for (size_t i = 0; i < NumPtcls; i++)
psiV_temp[i] = d2psiM(iat, *(it++));
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, iat, ratioLapl);
//MultiDiracDeterminant::InverseUpdateByColumn(dpsiMinv,d2psiM,workV1,workV2,iat,ratioLapl,confgList[ReferenceDeterminant].occup.begin());
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, iat) = d2psiM(iat, i);
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, iat, lapls, dpsiMinv, TpsiM, dotProducts, *detData,
*uniquePairs, *DetSigns);
//Adding the spin gradient
dpsiMinv = psiMinv;
it = confgList[ReferenceDeterminant].occup.begin();
for (size_t i = 0; i < NumPtcls; i++)
psiV_temp[i] = dspin_psiM(iat, *(it++));
InverseUpdateByColumn(dpsiMinv, psiV_temp, workV1, workV2, iat, spingradRatio);
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, iat) = dspin_psiM(iat, i);
BuildDotProductsAndCalculateRatios(ReferenceDeterminant, iat, spingrads, dpsiMinv, TpsiM, dotProducts, *detData,
*uniquePairs, *DetSigns);
// restore matrix
for (size_t i = 0; i < NumOrbitals; i++)
TpsiM(i, iat) = psiM(iat, i);
}
} // NumPtcls==1
psiMinv_temp = psiMinv;
evalWTimer.stop();
}
MultiDiracDeterminant::LogValueType MultiDiracDeterminant::updateBuffer(ParticleSet& P,
WFBufferType& buf,
bool fromscratch)
{
evaluateForWalkerMove(P, (fromscratch || UpdateMode == ORB_PBYP_RATIO));
assert(P.is_spinor_ == is_spinor_);
if (is_spinor_)
evaluateForWalkerMoveWithSpin(P, (fromscratch || UpdateMode == ORB_PBYP_RATIO));
else
evaluateForWalkerMove(P, (fromscratch || UpdateMode == ORB_PBYP_RATIO));
buf.put(psiM.first_address(), psiM.last_address());
buf.put(FirstAddressOfdpsiM, LastAddressOfdpsiM);
buf.put(d2psiM.first_address(), d2psiM.last_address());
@ -202,11 +317,17 @@ MultiDiracDeterminant::LogValueType MultiDiracDeterminant::updateBuffer(Particle
buf.put(detValues.first_address(), detValues.last_address());
buf.put(FirstAddressOfGrads, LastAddressOfGrads);
buf.put(lapls.first_address(), lapls.last_address());
if (is_spinor_)
{
buf.put(dspin_psiM.first_address(), dspin_psiM.last_address());
buf.put(spingrads.first_address(), spingrads.last_address());
}
return 1.0;
}
void MultiDiracDeterminant::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
{
assert(P.is_spinor_ == is_spinor_);
buf.get(psiM.first_address(), psiM.last_address());
buf.get(FirstAddressOfdpsiM, LastAddressOfdpsiM);
buf.get(d2psiM.first_address(), d2psiM.last_address());
@ -214,6 +335,11 @@ void MultiDiracDeterminant::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
buf.get(detValues.first_address(), detValues.last_address());
buf.get(FirstAddressOfGrads, LastAddressOfGrads);
buf.get(lapls.first_address(), lapls.last_address());
if (is_spinor_)
{
buf.get(dspin_psiM.first_address(), dspin_psiM.last_address());
buf.get(spingrads.first_address(), spingrads.last_address());
}
// only used with ORB_PBYP_ALL,
psiMinv_temp = psiMinv;
int n1 = psiM.extent(0);
@ -229,6 +355,7 @@ void MultiDiracDeterminant::acceptMove(ParticleSet& P, int iat, bool safe_to_del
{
const int WorkingIndex = iat - FirstIndex;
assert(WorkingIndex >= 0 && WorkingIndex < LastIndex - FirstIndex);
assert(P.is_spinor_ == is_spinor_);
switch (UpdateMode)
{
case ORB_PBYP_RATIO:
@ -249,6 +376,8 @@ void MultiDiracDeterminant::acceptMove(ParticleSet& P, int iat, bool safe_to_del
std::copy(psiV.begin(), psiV.end(), psiM[WorkingIndex]);
std::copy(dpsiV.begin(), dpsiV.end(), dpsiM[WorkingIndex]);
std::copy(d2psiV.begin(), d2psiV.end(), d2psiM[WorkingIndex]);
if (is_spinor_)
std::copy(dspin_psiV.begin(), dspin_psiV.end(), dspin_psiM[WorkingIndex]);
break;
default:
psiMinv = psiMinv_temp;
@ -260,6 +389,11 @@ void MultiDiracDeterminant::acceptMove(ParticleSet& P, int iat, bool safe_to_del
std::copy(psiV.begin(), psiV.end(), psiM[WorkingIndex]);
std::copy(dpsiV.begin(), dpsiV.end(), dpsiM[WorkingIndex]);
std::copy(d2psiV.begin(), d2psiV.end(), d2psiM[WorkingIndex]);
if (is_spinor_)
{
std::copy(new_spingrads.begin(), new_spingrads.end(), spingrads.begin());
std::copy(dspin_psiV.begin(), dspin_psiV.end(), dspin_psiM[WorkingIndex]);
}
break;
}
}
@ -297,6 +431,7 @@ MultiDiracDeterminant::MultiDiracDeterminant(const MultiDiracDeterminant& s)
: WaveFunctionComponent(s),
UpdateTimer(*timer_manager.createTimer(ClassName + "::update")),
RatioTimer(*timer_manager.createTimer(ClassName + "::ratio")),
MWRatioTimer(*timer_manager.createTimer(ClassName + "::mwratio")),
InverseTimer(*timer_manager.createTimer(ClassName + "::inverse")),
buildTableTimer(*timer_manager.createTimer(ClassName + "::buildTable")),
readMatTimer(*timer_manager.createTimer(ClassName + "::readMat")),
@ -305,13 +440,14 @@ MultiDiracDeterminant::MultiDiracDeterminant(const MultiDiracDeterminant& s)
evalOrb1Timer(*timer_manager.createTimer(ClassName + "::evalOrbGrad")),
readMatGradTimer(*timer_manager.createTimer(ClassName + "::readMatGrad")),
buildTableGradTimer(*timer_manager.createTimer(ClassName + "::buildTableGrad")),
ExtraStuffTimer(*timer_manager.createTimer(ClassName + "::ExtraStuff")),
ExtraStuffTimer(*timer_manager.createTimer(ClassName + "::RefDetInvUpdate")),
Phi(s.Phi->makeClone()),
NumOrbitals(Phi->getOrbitalSetSize()),
NP(0),
FirstIndex(s.FirstIndex),
ciConfigList(s.ciConfigList),
ReferenceDeterminant(s.ReferenceDeterminant),
is_spinor_(s.is_spinor_),
detData(s.detData),
uniquePairs(s.uniquePairs),
DetSigns(s.DetSigns)
@ -333,11 +469,13 @@ WaveFunctionComponentPtr MultiDiracDeterminant::makeClone(ParticleSet& tqp) cons
/** constructor
*@param spos the single-particle orbital set
*@param first index of the first particle
*@param spinor flag to determinane if spin arrays need to be resized and used
*/
MultiDiracDeterminant::MultiDiracDeterminant(std::unique_ptr<SPOSet>&& spos, int first)
MultiDiracDeterminant::MultiDiracDeterminant(std::unique_ptr<SPOSet>&& spos, bool spinor)
: WaveFunctionComponent("MultiDiracDeterminant"),
UpdateTimer(*timer_manager.createTimer(ClassName + "::update")),
RatioTimer(*timer_manager.createTimer(ClassName + "::ratio")),
MWRatioTimer(*timer_manager.createTimer(ClassName + "::mwratio")),
InverseTimer(*timer_manager.createTimer(ClassName + "::inverse")),
buildTableTimer(*timer_manager.createTimer(ClassName + "::buildTable")),
readMatTimer(*timer_manager.createTimer(ClassName + "::readMat")),
@ -346,12 +484,13 @@ MultiDiracDeterminant::MultiDiracDeterminant(std::unique_ptr<SPOSet>&& spos, int
evalOrb1Timer(*timer_manager.createTimer(ClassName + "::evalOrbGrad")),
readMatGradTimer(*timer_manager.createTimer(ClassName + "::readMatGrad")),
buildTableGradTimer(*timer_manager.createTimer(ClassName + "::buildTableGrad")),
ExtraStuffTimer(*timer_manager.createTimer(ClassName + "::ExtraStuff")),
ExtraStuffTimer(*timer_manager.createTimer(ClassName + "::RefDetInvUpdate")),
Phi(std::move(spos)),
NumOrbitals(Phi->getOrbitalSetSize()),
NP(0),
FirstIndex(first),
ReferenceDeterminant(0)
FirstIndex(-1),
ReferenceDeterminant(0),
is_spinor_(spinor)
{
(Phi->isOptimizable() == true) ? Optimizable = true : Optimizable = false;
@ -368,6 +507,7 @@ MultiDiracDeterminant::~MultiDiracDeterminant() = default;
void MultiDiracDeterminant::registerData(ParticleSet& P, WFBufferType& buf)
{
assert(P.is_spinor_ == is_spinor_);
if (NP == 0)
//first time, allocate once
{
@ -378,7 +518,10 @@ void MultiDiracDeterminant::registerData(ParticleSet& P, WFBufferType& buf)
FirstAddressOfdpsiM = &(dpsiM(0, 0)[0]); //(*dpsiM.begin())[0]);
LastAddressOfdpsiM = FirstAddressOfdpsiM + NumPtcls * NumOrbitals * DIM;
}
evaluateForWalkerMove(P, true);
if (is_spinor_)
evaluateForWalkerMoveWithSpin(P, true);
else
evaluateForWalkerMove(P, true);
//add the data:
buf.add(psiM.first_address(), psiM.last_address());
buf.add(FirstAddressOfdpsiM, LastAddressOfdpsiM);
@ -387,6 +530,11 @@ void MultiDiracDeterminant::registerData(ParticleSet& P, WFBufferType& buf)
buf.add(detValues.first_address(), detValues.last_address());
buf.add(FirstAddressOfGrads, LastAddressOfGrads);
buf.add(lapls.first_address(), lapls.last_address());
if (is_spinor_)
{
buf.add(dspin_psiM.first_address(), dspin_psiM.last_address());
buf.add(spingrads.first_address(), spingrads.last_address());
}
}
@ -427,12 +575,21 @@ void MultiDiracDeterminant::resize(int nel)
new_lapls.resize(NumDets, nel);
dotProducts.resize(NumOrbitals, NumOrbitals);
DetCalculator.resize(nel);
if (is_spinor_)
{
dspin_psiV.resize(NumOrbitals);
dspin_psiM.resize(nel, NumOrbitals);
spingrads.resize(NumDets, nel);
new_spingrads.resize(NumDets, nel);
}
}
void MultiDiracDeterminant::registerTimers()
{
UpdateTimer.reset();
RatioTimer.reset();
MWRatioTimer.reset();
InverseTimer.reset();
buildTableTimer.reset();
readMatTimer.reset();

View File

@ -35,10 +35,10 @@ class MultiDiracDeterminant : public WaveFunctionComponent
public:
bool Optimizable;
void registerTimers();
NewTimer &UpdateTimer, &RatioTimer, &InverseTimer, &buildTableTimer, &readMatTimer, &evalWTimer, &evalOrbTimer,
&evalOrb1Timer;
NewTimer &UpdateTimer, &RatioTimer, &MWRatioTimer, &InverseTimer, &buildTableTimer, &readMatTimer, &evalWTimer,
&evalOrbTimer, &evalOrb1Timer;
NewTimer &readMatGradTimer, &buildTableGradTimer, &ExtraStuffTimer;
// Optimizable parameters
// Optimizable parameter
opt_variables_type myVars;
typedef SPOSet::IndexVector_t IndexVector_t;
@ -56,7 +56,7 @@ public:
*@param spos the single-particle orbital set
*@param first index of the first particle
*/
MultiDiracDeterminant(std::unique_ptr<SPOSet>&& spos, int first = 0);
MultiDiracDeterminant(std::unique_ptr<SPOSet>&& spos, bool spinor);
///default destructor
~MultiDiracDeterminant();
@ -369,16 +369,35 @@ public:
*@param refPtcl if given, the id of the reference particle in virtual moves
*/
void evaluateDetsForPtclMove(const ParticleSet& P, int iat, int refPtcl = -1);
/// multi walker version of evaluateDetsForPtclMove
void static mw_evaluateDetsForPtclMove(const RefVectorWithLeader<MultiDiracDeterminant>& det_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat);
/// evaluate the value and gradients of all the unique determinants with one electron moved. Used by the table method
void evaluateDetsAndGradsForPtclMove(const ParticleSet& P, int iat);
/// multi walker version of mw_evaluateDetsAndGradsForPtclMove
void static mw_evaluateDetsAndGradsForPtclMove(const RefVectorWithLeader<MultiDiracDeterminant>& det_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat);
/// evaluate the value and gradients of all the unique determinants with one electron moved. Used by the table method. Includes Spin Gradient data
void evaluateDetsAndGradsForPtclMoveWithSpin(const ParticleSet& P, int iat);
/// evaluate the gradients of all the unique determinants with one electron moved. Used by the table method
void evaluateGrads(ParticleSet& P, int iat);
/// multi walker version of mw_evaluateGrads
void static mw_evaluateGrads(const RefVectorWithLeader<MultiDiracDeterminant>& det_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat);
/// evaluate the gradients of all the unique determinants with one electron moved. Used by the table method. Includes Spin Gradient data
void evaluateGradsWithSpin(ParticleSet& P, int iat);
void evaluateAllForPtclMove(const ParticleSet& P, int iat);
// full evaluation of all the structures from scratch, used in evaluateLog for example
void evaluateForWalkerMove(const ParticleSet& P, bool fromScratch = true);
// full evaluation of all the structures from scratch, used in evaluateLog for example. Includes spin gradients for spin moves
void evaluateForWalkerMoveWithSpin(const ParticleSet& P, bool fromScratch = true);
// accessors
inline int getNumDets() const { return ciConfigList->size(); }
@ -390,6 +409,8 @@ public:
ValueVector_t detValues, new_detValues;
GradMatrix_t grads, new_grads;
ValueMatrix_t lapls, new_lapls;
// additional storage for spin derivatives. Only resized if the calculation uses spinors
ValueMatrix_t spingrads, new_spingrads;
private:
///reset the size: with the number of particles
@ -413,6 +434,8 @@ private:
// if its value is zero, then use a data from backup, but always use this one
// by default
int ReferenceDeterminant;
// flag to determine if spin arrays need to be resized and used. Set by ParticleSet::is_spinor_ in SlaterDetBuilder
const bool is_spinor_;
/// psiM(i,j) \f$= \psi_j({\bf r}_i)\f$
/// TpsiM(i,j) \f$= psiM(j,i) \f$
@ -423,12 +446,18 @@ private:
ValueMatrix_t dpsiMinv;
/// d2psiM(i,j) \f$= \nabla_i^2 \psi_j({\bf r}_i)\f$
ValueMatrix_t d2psiM;
/* dspin_psiM(i,j) \f$= \partial_{s_i} \psi_j({\bf r}_i,s_i)\f$ where \f$s_i\f$s is the spin variable
* This is only resized if a spinor calculation is used
*/
ValueMatrix_t dspin_psiM;
/// value of single-particle orbital for particle-by-particle update
ValueVector_t psiV, psiV_temp;
GradVector_t dpsiV;
ValueVector_t d2psiV;
ValueVector_t workV1, workV2;
//spin derivative of single-particle orbitals. Only resized if a spinor calculation
ValueVector_t dspin_psiV;
ValueMatrix_t dotProducts;

View File

@ -24,8 +24,13 @@ MultiSlaterDeterminantFast::MultiSlaterDeterminantFast(ParticleSet& targetPtcl,
bool use_pre_computing)
: WaveFunctionComponent("MultiSlaterDeterminantFast"),
RatioTimer(*timer_manager.createTimer(ClassName + "::ratio")),
MWRatioTimer(*timer_manager.createTimer(ClassName + "::mwratio")),
OffloadRatioTimer(*timer_manager.createTimer(ClassName + "::offloadRatio")),
OffloadGradTimer(*timer_manager.createTimer(ClassName + "::offloadGrad")),
EvalGradTimer(*timer_manager.createTimer(ClassName + "::evalGrad")),
MWEvalGradTimer(*timer_manager.createTimer(ClassName + "::mwevalGrad")),
RatioGradTimer(*timer_manager.createTimer(ClassName + "::ratioGrad")),
MWRatioGradTimer(*timer_manager.createTimer(ClassName + "::mwratioGrad")),
PrepareGroupTimer(*timer_manager.createTimer(ClassName + "::prepareGroup")),
UpdateTimer(*timer_manager.createTimer(ClassName + "::updateBuffer")),
AccRejTimer(*timer_manager.createTimer(ClassName + "::Accept_Reject")),
@ -197,7 +202,12 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::evaluate(const P
{
ScopedTimer local_timer(EvaluateTimer);
for (size_t id = 0; id < Dets.size(); id++)
Dets[id]->evaluateForWalkerMove(P);
{
if (P.is_spinor_)
Dets[id]->evaluateForWalkerMoveWithSpin(P);
else
Dets[id]->evaluateForWalkerMove(P);
}
psiCurrent = evaluate_vgl_impl(P, myG, myL);
@ -240,6 +250,138 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::evalGrad_impl(Pa
return psi;
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::evalGradWithSpin_impl(ParticleSet& P,
int iat,
bool newpos,
GradType& g_at,
ComplexType& sg_at)
{
const int det_id = getDetID(iat);
if (newpos)
Dets[det_id]->evaluateDetsAndGradsForPtclMoveWithSpin(P, iat);
else
Dets[det_id]->evaluateGradsWithSpin(P, iat);
const GradMatrix_t& grads = (newpos) ? Dets[det_id]->new_grads : Dets[det_id]->grads;
const ValueType* restrict detValues0 = (newpos) ? Dets[det_id]->new_detValues.data() : Dets[det_id]->detValues.data();
const ValueMatrix_t& spingrads = (newpos) ? Dets[det_id]->new_spingrads : Dets[det_id]->spingrads;
const size_t noffset = Dets[det_id]->getFirstIndex();
PsiValueType psi(0);
for (size_t i = 0; i < Dets[det_id]->getNumDets(); i++)
{
psi += detValues0[i] * C_otherDs[det_id][i];
g_at += C_otherDs[det_id][i] * grads(i, iat - noffset);
sg_at += C_otherDs[det_id][i] * spingrads(i, iat - noffset);
}
return psi;
}
void MultiSlaterDeterminantFast::mw_evalGrad_impl(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
bool newpos,
std::vector<GradType>& grad_now,
std::vector<PsiValueType>& psi_list)
{
auto& det_leader = WFC_list.getCastedLeader<MultiSlaterDeterminantFast>();
auto& det_value_ptr_list = det_leader.det_value_ptr_list;
auto& C_otherDs_ptr_list = det_leader.C_otherDs_ptr_list;
const int det_id = det_leader.getDetID(iat);
const int nw = WFC_list.size();
const int ndets = det_leader.Dets[det_id]->getNumDets();
RefVectorWithLeader<MultiDiracDeterminant> det_list(*det_leader.Dets[det_id]);
det_list.reserve(WFC_list.size());
ScopedTimer local_timer(det_leader.MWEvalGradTimer);
for (int iw = 0; iw < WFC_list.size(); iw++)
{
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
det_list.push_back(*det.Dets[det_id]);
if (newpos)
det.Dets[det_id]->evaluateDetsAndGradsForPtclMove(P_list[iw], iat);
}
if (!newpos)
det_leader.Dets[det_id]->mw_evaluateGrads(det_list, P_list, iat);
det_value_ptr_list.resize(nw);
C_otherDs_ptr_list.resize(nw);
//Data layout change for grads function
Matrix<ValueType, OMPallocator<ValueType, PinnedAlignedAllocator<ValueType>>> Grads_copy;
Grads_copy.resize(3 * nw, ndets);
for (size_t iw = 0; iw < nw; iw++)
{
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
const size_t noffset = det.Dets[det_id]->getFirstIndex();
GradMatrix_t& grads = (newpos) ? det.Dets[det_id]->new_grads : det.Dets[det_id]->grads;
for (size_t i = 0; i < ndets; i++)
{
Grads_copy[3 * iw + 0][i] = grads(i, iat - noffset)[0];
Grads_copy[3 * iw + 1][i] = grads(i, iat - noffset)[1];
Grads_copy[3 * iw + 2][i] = grads(i, iat - noffset)[2];
}
ValueType* restrict detValues0 =
(newpos) ? det.Dets[det_id]->new_detValues.data() : det.Dets[det_id]->detValues.data();
// allocate device memory and transfer content to device
PRAGMA_OFFLOAD("omp target enter data map(to : detValues0[:ndets])")
det_value_ptr_list[iw] = getOffloadDevicePtr(detValues0);
//Put C_otherDs in host
C_otherDs_ptr_list[iw] = det.C_otherDs[det_id].device_data();
}
std::vector<ValueType> grad_now_list(nw * 3, 0);
auto* grad_now_list_ptr = grad_now_list.data();
auto* Grads_copy_ptr = Grads_copy.data();
auto* psi_list_ptr = psi_list.data();
auto* C_otherDs_ptr_list_ptr = C_otherDs_ptr_list.data();
auto* det_value_ptr_list_ptr = det_value_ptr_list.data();
{
ScopedTimer local_timer(det_leader.OffloadGradTimer);
PRAGMA_OFFLOAD("omp target teams distribute map(from: psi_list_ptr[:nw]) \
map(from: grad_now_list_ptr[:3 * nw]) \
map(always, to: det_value_ptr_list_ptr[:nw], C_otherDs_ptr_list_ptr[:nw]) \
map(always, to: Grads_copy_ptr[:Grads_copy.size()])")
for (size_t iw = 0; iw < nw; iw++)
{
PsiValueType psi_local(0);
ValueType grad_local_x(0);
ValueType grad_local_y(0);
ValueType grad_local_z(0);
PRAGMA_OFFLOAD("omp parallel for reduction(+:psi_local, grad_local_x, grad_local_y, grad_local_z)")
for (size_t i = 0; i < ndets; i++)
{
psi_local += det_value_ptr_list_ptr[iw][i] * C_otherDs_ptr_list_ptr[iw][i];
grad_local_x += C_otherDs_ptr_list_ptr[iw][i] * Grads_copy_ptr[(3 * iw + 0) * ndets + i];
grad_local_y += C_otherDs_ptr_list_ptr[iw][i] * Grads_copy_ptr[(3 * iw + 1) * ndets + i];
grad_local_z += C_otherDs_ptr_list_ptr[iw][i] * Grads_copy_ptr[(3 * iw + 2) * ndets + i];
}
psi_list_ptr[iw] = psi_local;
grad_now_list_ptr[iw * 3 + 0] = grad_local_x;
grad_now_list_ptr[iw * 3 + 1] = grad_local_y;
grad_now_list_ptr[iw * 3 + 2] = grad_local_z;
}
}
for (size_t iw = 0; iw < nw; iw++)
{
grad_now[iw][0] = grad_now_list[iw * 3 + 0];
grad_now[iw][1] = grad_now_list[iw * 3 + 1];
grad_now[iw][2] = grad_now_list[iw * 3 + 2];
//Free Memory
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
ValueType* restrict detValues0 =
(newpos) ? det.Dets[det_id]->new_detValues.data() : det.Dets[det_id]->detValues.data();
PRAGMA_OFFLOAD("omp target exit data map(delete : detValues0[:ndets])") //free memory on device
}
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::evalGrad_impl_no_precompute(ParticleSet& P,
int iat,
bool newpos,
@ -275,9 +417,47 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::evalGrad_impl_no
return psi;
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::evalGradWithSpin_impl_no_precompute(ParticleSet& P,
int iat,
bool newpos,
GradType& g_at,
ComplexType& sg_at)
{
const int det_id = getDetID(iat);
if (newpos)
Dets[det_id]->evaluateDetsAndGradsForPtclMoveWithSpin(P, iat);
else
Dets[det_id]->evaluateGradsWithSpin(P, iat);
const GradMatrix_t& grads = (newpos) ? Dets[det_id]->new_grads : Dets[det_id]->grads;
const ValueType* restrict detValues0 = (newpos) ? Dets[det_id]->new_detValues.data() : Dets[det_id]->detValues.data();
const ValueMatrix_t& spingrads = (newpos) ? Dets[det_id]->new_spingrads : Dets[det_id]->spingrads;
const size_t* restrict det0 = (*C2node)[det_id].data();
const ValueType* restrict cptr = C->data();
const size_t nc = C->size();
const size_t noffset = Dets[det_id]->getFirstIndex();
PsiValueType psi(0);
for (size_t i = 0; i < nc; ++i)
{
const size_t d0 = det0[i];
//const size_t d1=det1[i];
//psi += cptr[i]*detValues0[d0] * detValues1[d1];
//g_at += cptr[i]*grads(d0,iat-noffset) * detValues1[d1];
ValueType t = cptr[i];
for (size_t id = 0; id < Dets.size(); id++)
if (id != det_id)
t *= Dets[id]->detValues[(*C2node)[id][i]];
psi += t * detValues0[d0];
g_at += t * grads(d0, iat - noffset);
sg_at += t * spingrads(d0, iat - noffset);
}
return psi;
}
WaveFunctionComponent::GradType MultiSlaterDeterminantFast::evalGrad(ParticleSet& P, int iat)
{
BackFlowStopper("Fast MSD+BF: evalGrad\n");
BackFlowStopper("Fast MSD+BF: evalGrad");
ScopedTimer local_timer(EvalGradTimer);
@ -292,9 +472,51 @@ WaveFunctionComponent::GradType MultiSlaterDeterminantFast::evalGrad(ParticleSet
return grad_iat;
}
WaveFunctionComponent::GradType MultiSlaterDeterminantFast::evalGradWithSpin(ParticleSet& P,
int iat,
ComplexType& spingrad)
{
BackFlowStopper("Fast MSD+BF: evalGrad");
ScopedTimer local_timer(EvalGradTimer);
GradType grad_iat;
PsiValueType psi;
ComplexType spingrad_iat;
if (use_pre_computing_)
psi = evalGradWithSpin_impl(P, iat, false, grad_iat, spingrad_iat);
else
psi = evalGradWithSpin_impl_no_precompute(P, iat, false, grad_iat, spingrad_iat);
grad_iat *= (PsiValueType(1.0) / psi);
spingrad += spingrad_iat / static_cast<ComplexType>(psi);
return grad_iat;
}
void MultiSlaterDeterminantFast::mw_evalGrad(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
std::vector<GradType>& grad_now) const
{
BackFlowStopper("Fast MSD+BF: mw_evalGrad");
if (!use_pre_computing_)
{
WaveFunctionComponent::mw_evalGrad(WFC_list, P_list, iat, grad_now);
return;
}
const int nw = WFC_list.size();
std::vector<PsiValueType> psi_list(nw, 0);
mw_evalGrad_impl(WFC_list, P_list, iat, false, grad_now, psi_list);
for (size_t iw = 0; iw < nw; iw++)
grad_now[iw] *= static_cast<ValueType>(PsiValueType(1.0) / psi_list[iw]);
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratioGrad(ParticleSet& P, int iat, GradType& grad_iat)
{
BackFlowStopper("Fast MSD+BF: ratioGrad\n");
BackFlowStopper("Fast MSD+BF: ratioGrad");
ScopedTimer local_timer(RatioGradTimer);
UpdateMode = ORB_PBYP_PARTIAL;
@ -311,6 +533,64 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratioGrad(Partic
return curRatio;
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratioGradWithSpin(ParticleSet& P,
int iat,
GradType& grad_iat,
ComplexType& spingrad_iat)
{
BackFlowStopper("Fast MSD+BF: ratioGrad");
ScopedTimer local_timer(RatioGradTimer);
UpdateMode = ORB_PBYP_PARTIAL;
GradType dummy;
ComplexType spindummy;
PsiValueType psiNew;
if (use_pre_computing_)
psiNew = evalGradWithSpin_impl(P, iat, true, dummy, spindummy);
else
psiNew = evalGradWithSpin_impl_no_precompute(P, iat, true, dummy, spindummy);
grad_iat += static_cast<ValueType>(PsiValueType(1.0) / psiNew) * dummy;
spingrad_iat += static_cast<ValueType>(PsiValueType(1.0) / psiNew) * spindummy;
curRatio = psiNew / psiCurrent;
return curRatio;
}
void MultiSlaterDeterminantFast::mw_ratioGrad(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
std::vector<WaveFunctionComponent::PsiValueType>& ratios,
std::vector<GradType>& grad_new) const
{
BackFlowStopper("Fast MSD+BF: mw_ratioGrad");
if (!use_pre_computing_)
{
WaveFunctionComponent::mw_ratioGrad(WFC_list, P_list, iat, ratios, grad_new);
return;
}
auto& det_leader = WFC_list.getCastedLeader<MultiSlaterDeterminantFast>();
auto& det_value_ptr_list = det_leader.det_value_ptr_list;
auto& C_otherDs_ptr_list = det_leader.C_otherDs_ptr_list;
const int nw = WFC_list.size();
ScopedTimer local_timer(det_leader.MWRatioGradTimer);
std::vector<PsiValueType> psi_list(nw, 0);
std::vector<GradType> dummy;
dummy.resize(nw);
mw_evalGrad_impl(WFC_list, P_list, iat, true, dummy, psi_list);
for (size_t iw = 0; iw < nw; iw++)
{
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
grad_new[iw] += static_cast<ValueType>(PsiValueType(1.0) / psi_list[iw]) * dummy[iw];
ratios[iw] = det.curRatio = psi_list[iw] / det.psiCurrent;
}
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratio_impl(ParticleSet& P, int iat)
{
const int det_id = getDetID(iat);
@ -330,6 +610,7 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratio_impl(Parti
return psi;
}
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratio_impl_no_precompute(ParticleSet& P, int iat)
{
const int det_id = getDetID(iat);
@ -356,7 +637,7 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratio_impl_no_pr
// use ci_node for this routine only
WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratio(ParticleSet& P, int iat)
{
BackFlowStopper("Fast MSD+BF: ratio\n");
BackFlowStopper("Fast MSD+BF: ratio");
ScopedTimer local_timer(RatioTimer);
UpdateMode = ORB_PBYP_RATIO;
@ -371,9 +652,84 @@ WaveFunctionComponent::PsiValueType MultiSlaterDeterminantFast::ratio(ParticleSe
return curRatio;
}
void MultiSlaterDeterminantFast::mw_calcRatio(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
std::vector<PsiValueType>& ratios) const
{
BackFlowStopper("Fast MSD+BF: mw_calcRatio");
if (!use_pre_computing_)
{
WaveFunctionComponent::mw_calcRatio(WFC_list, P_list, iat, ratios);
return;
}
const int det_id = getDetID(iat);
const int nw = WFC_list.size();
auto& det_leader = WFC_list.getCastedLeader<MultiSlaterDeterminantFast>();
auto& det_value_ptr_list = det_leader.det_value_ptr_list;
auto& C_otherDs_ptr_list = det_leader.C_otherDs_ptr_list;
const int ndets = det_leader.Dets[det_id]->getNumDets();
ScopedTimer local_timer(det_leader.MWRatioTimer);
RefVectorWithLeader<MultiDiracDeterminant> det_list(*det_leader.Dets[det_id]);
det_list.reserve(WFC_list.size());
for (int iw = 0; iw < WFC_list.size(); iw++)
{
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
det_list.push_back(*det.Dets[det_id]);
}
det_leader.Dets[det_id]->mw_evaluateDetsForPtclMove(det_list, P_list, iat);
det_value_ptr_list.resize(nw);
C_otherDs_ptr_list.resize(nw);
for (size_t iw = 0; iw < nw; iw++)
{
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
det.UpdateMode = ORB_PBYP_RATIO;
ValueType* restrict detValues0 = det.Dets[det_id]->new_detValues.data(); //always new
// allocate device memory and transfer content to device
PRAGMA_OFFLOAD("omp target enter data map(to : detValues0[:ndets])")
det_value_ptr_list[iw] = getOffloadDevicePtr(detValues0);
C_otherDs_ptr_list[iw] = det.C_otherDs[det_id].device_data();
}
std::vector<PsiValueType> psi_list(nw, 0);
auto* psi_list_ptr = psi_list.data();
auto* C_otherDs_ptr_list_ptr = C_otherDs_ptr_list.data();
auto* det_value_ptr_list_ptr = det_value_ptr_list.data();
OffloadRatioTimer.start();
PRAGMA_OFFLOAD("omp target teams distribute map(from: psi_list_ptr[:nw]) \
map(always, to: det_value_ptr_list_ptr[:nw], C_otherDs_ptr_list_ptr[:nw])")
for (size_t iw = 0; iw < nw; iw++)
{
PsiValueType psi_local(0);
PRAGMA_OFFLOAD("omp parallel for reduction(+ : psi_local)")
for (size_t i = 0; i < ndets; i++)
{
psi_local += det_value_ptr_list_ptr[iw][i] * C_otherDs_ptr_list_ptr[iw][i];
}
psi_list_ptr[iw] = psi_local;
}
OffloadRatioTimer.stop();
for (size_t iw = 0; iw < nw; iw++)
{
auto& det = WFC_list.getCastedElement<MultiSlaterDeterminantFast>(iw);
ratios[iw] = det.curRatio = psi_list[iw] / det.psiCurrent;
ValueType* restrict detValues0 = det.Dets[det_id]->new_detValues.data(); //always new
PRAGMA_OFFLOAD("omp target exit data map(delete : detValues0[:ndets])") //free memory on device.
}
}
void MultiSlaterDeterminantFast::evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios)
{
BackFlowStopper("Fast MSD+BF: evaluateRatios\n");
BackFlowStopper("Fast MSD+BF: evaluateRatios");
ScopedTimer local_timer(RatioTimer);
const int det_id = getDetID(VP.refPtcl);
@ -411,7 +767,7 @@ void MultiSlaterDeterminantFast::acceptMove(ParticleSet& P, int iat, bool safe_t
{
// this should depend on the type of update, ratio / ratioGrad
// for now is incorrect fot ratio(P,iat,dG,dL) updates
BackFlowStopper("Fast MSD+BF: acceptMove\n");
BackFlowStopper("Fast MSD+BF: acceptMove");
ScopedTimer local_timer(AccRejTimer);
// update psiCurrent,myG_temp,myL_temp
@ -423,7 +779,7 @@ void MultiSlaterDeterminantFast::acceptMove(ParticleSet& P, int iat, bool safe_t
void MultiSlaterDeterminantFast::restore(int iat)
{
BackFlowStopper("Fast MSD+BF: restore\n");
BackFlowStopper("Fast MSD+BF: restore");
ScopedTimer local_timer(AccRejTimer);
Dets[getDetID(iat)]->restore(iat);
@ -432,7 +788,7 @@ void MultiSlaterDeterminantFast::restore(int iat)
void MultiSlaterDeterminantFast::registerData(ParticleSet& P, WFBufferType& buf)
{
BackFlowStopper("Fast MSD+BF: restore\n");
BackFlowStopper("Fast MSD+BF: restore");
for (size_t id = 0; id < Dets.size(); id++)
Dets[id]->registerData(P, buf);
@ -463,7 +819,7 @@ WaveFunctionComponent::LogValueType MultiSlaterDeterminantFast::updateBuffer(Par
void MultiSlaterDeterminantFast::copyFromBuffer(ParticleSet& P, WFBufferType& buf)
{
BackFlowStopper("Fast MSD+BF: copyFromBuffer\n");
BackFlowStopper("Fast MSD+BF: copyFromBuffer");
for (size_t id = 0; id < Dets.size(); id++)
Dets[id]->copyFromBuffer(P, buf);
@ -803,6 +1159,7 @@ void MultiSlaterDeterminantFast::registerTimers()
{
RatioTimer.reset();
EvalGradTimer.reset();
MWEvalGradTimer.reset();
RatioGradTimer.reset();
PrepareGroupTimer.reset();
UpdateTimer.reset();
@ -843,6 +1200,9 @@ void MultiSlaterDeterminantFast::precomputeC_otherDs(const ParticleSet& P, int i
product *= Dets[id]->detValues[(*C2node)[id][i]];
C_otherDs[ig][(*C2node)[ig][i]] += product;
}
//put C_otherDs in host
auto* C_otherDs_ptr = C_otherDs[ig].data();
PRAGMA_OFFLOAD("omp target update to(C_otherDs_ptr[:Dets[ig]->getNumDets()])") //transfer content to device
}

View File

@ -20,6 +20,8 @@
#include "QMCWaveFunctions/Fermion/MultiDiracDeterminant.h"
#include "Utilities/TimerManager.h"
#include "QMCWaveFunctions/Fermion/BackflowTransformation.h"
#include "Platforms/PinnedAllocator.h"
#include "OMPTarget/OMPallocator.hpp"
namespace qmcplusplus
{
@ -51,8 +53,12 @@ class MultiSlaterDeterminantFast : public WaveFunctionComponent
{
public:
void registerTimers();
NewTimer &RatioTimer, &EvalGradTimer, &RatioGradTimer, &PrepareGroupTimer, &UpdateTimer;
NewTimer &AccRejTimer, &EvaluateTimer;
NewTimer &RatioTimer, &MWRatioTimer, &OffloadRatioTimer, &OffloadGradTimer;
NewTimer &EvalGradTimer, &MWEvalGradTimer, &RatioGradTimer, &MWRatioGradTimer;
NewTimer &PrepareGroupTimer, &UpdateTimer, &AccRejTimer, &EvaluateTimer;
template<typename DT>
using OffloadPinnedAllocator = OMPallocator<DT, PinnedAlignedAllocator<DT>>;
typedef SPOSet* SPOSetPtr;
typedef OrbitalSetTraits<ValueType>::IndexVector_t IndexVector_t;
@ -83,7 +89,7 @@ public:
//builds orbital rotation parameters using MultiSlater member variables
void buildOptVariables();
void BackFlowStopper(const std::string& func_name)
void BackFlowStopper(const std::string& func_name) const
{
if (usingBF)
throw std::runtime_error(func_name + " not implemented!\n");
@ -110,13 +116,29 @@ public:
void prepareGroup(ParticleSet& P, int ig) override;
GradType evalGrad(ParticleSet& P, int iat) override;
PsiValueType evalGrad_impl(ParticleSet& P, int iat, bool newpos, GradType& g_at);
PsiValueType evalGrad_impl_no_precompute(ParticleSet& P, int iat, bool newpos, GradType& g_at);
//evalGrad, but returns the spin gradient as well
GradType evalGradWithSpin(ParticleSet& P, int iat, ComplexType& spingrad) override;
void mw_evalGrad(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
std::vector<GradType>& grad_now) const override;
void mw_ratioGrad(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
std::vector<PsiValueType>& ratios,
std::vector<GradType>& grad_new) const override;
void mw_calcRatio(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
std::vector<PsiValueType>& ratios) const override;
PsiValueType ratio(ParticleSet& P, int iat) override;
PsiValueType ratioGrad(ParticleSet& P, int iat, GradType& grad_iat) override;
PsiValueType ratio_impl(ParticleSet& P, int iat);
PsiValueType ratio_impl_no_precompute(ParticleSet& P, int iat);
//ratioGradWithSpin, but includes tthe spin gradient info
PsiValueType ratioGradWithSpin(ParticleSet& P, int iat, GradType& grad_iat, ComplexType& spingrad_iat) override;
void evaluateRatios(const VirtualParticleSet& VP, std::vector<ValueType>& ratios) override;
@ -166,7 +188,10 @@ public:
/// CI coefficients
std::shared_ptr<std::vector<ValueType>> C;
/// C_n x D^1_n x D^2_n ... D^3_n with one D removed. Summed by group. [spin, unique det id]
std::vector<std::vector<ValueType>> C_otherDs;
std::vector<Vector<ValueType, OffloadPinnedAllocator<ValueType>>> C_otherDs;
/// a collection of device pointers of multiple walkers fused for fast H2D transfer.
Vector<const ValueType*, OffloadPinnedAllocator<const ValueType*>> C_otherDs_ptr_list;
Vector<const ValueType*, OffloadPinnedAllocator<const ValueType*>> det_value_ptr_list;
ParticleSet::ParticleGradient_t myG, myG_temp;
ParticleSet::ParticleLaplacian_t myL, myL_temp;
@ -200,6 +225,37 @@ private:
return id;
}
/** an implementation shared by evalGrad and ratioGrad. Use precomputed data
* @param newpos to distinguish evalGrad(false) ratioGrad(true)
*/
PsiValueType evalGrad_impl(ParticleSet& P, int iat, bool newpos, GradType& g_at);
/// multi walker version of evalGrad_impl
static void mw_evalGrad_impl(const RefVectorWithLeader<WaveFunctionComponent>& WFC_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
bool newpos,
std::vector<GradType>& grad_now,
std::vector<PsiValueType>& psi_list);
/** an implementation shared by evalGrad and ratioGrad. No use of precomputed data
* @param newpos to distinguish evalGrad(false) ratioGrad(true)
*/
PsiValueType evalGrad_impl_no_precompute(ParticleSet& P, int iat, bool newpos, GradType& g_at);
//implemtation for evalGradWithSpin
PsiValueType evalGradWithSpin_impl(ParticleSet& P, int iat, bool newpos, GradType& g_at, ComplexType& sg_at);
//implemtation for evalGradWithSpin with no precomputation
PsiValueType evalGradWithSpin_impl_no_precompute(ParticleSet& P,
int iat,
bool newpos,
GradType& g_at,
ComplexType& sg_at);
// an implementation of ratio. Use precomputed data
PsiValueType ratio_impl(ParticleSet& P, int iat);
// an implementation of ratio. No use of precomputed data
PsiValueType ratio_impl_no_precompute(ParticleSet& P, int iat);
/** precompute C_otherDs for a given particle group
* @param P a particle set
* @param ig group id

View File

@ -245,11 +245,12 @@ WaveFunctionComponent* SlaterDetBuilder::buildComponent(xmlNodePtr cur)
APP_ABORT("Backflow is not implemented with the table method.");
}
bool spinor = targetPtcl.is_spinor_;
std::vector<std::unique_ptr<MultiDiracDeterminant>> dets;
for (int grp = 0; grp < nGroups; grp++)
{
app_log() << " Creating base determinant (" << grp << ") for MSD expansion. \n";
dets.emplace_back(std::make_unique<MultiDiracDeterminant>(std::move(spo_clones[grp]), grp));
dets.emplace_back(std::make_unique<MultiDiracDeterminant>(std::move(spo_clones[grp]), spinor));
}
if (msd_algorithm == "precomputed_table_method")

View File

@ -112,6 +112,17 @@ void SPOSet::evaluateThirdDeriv(const ParticleSet& P, int first, int last, GGGMa
APP_ABORT("Need specialization of SPOSet::evaluateThirdDeriv(). \n");
}
void SPOSet::evaluate_notranspose_spin(const ParticleSet& P,
int first,
int last,
ValueMatrix_t& logdet,
GradMatrix_t& dlogdet,
ValueMatrix_t& d2logdet,
ValueMatrix_t& dspinlogdet)
{
APP_ABORT("Need specialization of " + className + "::evaluate_notranspose_spin(P,iat,psi,dpsi,d2logdet, dspin_logdet) (vector quantities)\n");
}
void SPOSet::mw_evaluate_notranspose(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int first,

View File

@ -30,7 +30,6 @@
namespace qmcplusplus
{
class ResourceCollection;
/** base class for Single-particle orbital sets
@ -246,8 +245,7 @@ public:
ValueVector_t& psi,
GradVector_t& dpsi,
ValueVector_t& d2psi,
ValueVector_t& dspin
);
ValueVector_t& dspin);
/** evaluate the values, gradients and laplacians of this single-particle orbital sets of multiple walkers
* @param spo_list the list of SPOSet pointers in a walker batch
@ -339,6 +337,26 @@ public:
GradMatrix_t& dlogdet,
ValueMatrix_t& d2logdet) = 0;
/** evaluate the values, gradients and laplacians of this single-particle orbital for [first,last) particles, including the spin gradient
* @param P current ParticleSet
* @param first starting index of the particles
* @param last ending index of the particles
* @param logdet determinant matrix to be inverted
* @param dlogdet gradients
* @param d2logdet laplacians
* @param dspinlogdet, spin gradients
*
* default implementation will abort for all SPOSets except SpinorSet
*
*/
virtual void evaluate_notranspose_spin(const ParticleSet& P,
int first,
int last,
ValueMatrix_t& logdet,
GradMatrix_t& dlogdet,
ValueMatrix_t& d2logdet,
ValueMatrix_t& dspinlogdet);
virtual void mw_evaluate_notranspose(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int first,

View File

@ -22,7 +22,7 @@ void SpinorSet::set_spos(std::unique_ptr<SPOSet>&& up, std::unique_ptr<SPOSet>&&
IndexType spo_size_down = dn->getOrbitalSetSize();
if (spo_size_up != spo_size_down)
APP_ABORT("SpinorSet::set_spos(...): up and down SPO components have different sizes.");
throw std::runtime_error("SpinorSet::set_spos(...): up and down SPO components have different sizes.");
setOrbitalSetSize(spo_size_up);
@ -39,6 +39,14 @@ void SpinorSet::set_spos(std::unique_ptr<SPOSet>&& up, std::unique_ptr<SPOSet>&&
d2psi_work_down.resize(OrbitalSetSize);
}
int SpinorSet::getBasisSetSize() const
{
IndexType basis_up = spo_up->getBasisSetSize();
IndexType basis_dn = spo_dn->getBasisSetSize();
assert(basis_up == basis_dn);
return basis_up;
}
void SpinorSet::resetParameters(const opt_variables_type& optVariables){};
void SpinorSet::setOrbitalSetSize(int norbs) { OrbitalSetSize = norbs; };
@ -170,6 +178,52 @@ void SpinorSet::evaluate_notranspose(const ParticleSet& P,
}
}
void SpinorSet::evaluate_notranspose_spin(const ParticleSet& P,
int first,
int last,
ValueMatrix_t& logdet,
GradMatrix_t& dlogdet,
ValueMatrix_t& d2logdet,
ValueMatrix_t& dspinlogdet)
{
IndexType nelec = P.getTotalNum();
logpsi_work_up.resize(nelec, OrbitalSetSize);
logpsi_work_down.resize(nelec, OrbitalSetSize);
dlogpsi_work_up.resize(nelec, OrbitalSetSize);
dlogpsi_work_down.resize(nelec, OrbitalSetSize);
d2logpsi_work_up.resize(nelec, OrbitalSetSize);
d2logpsi_work_down.resize(nelec, OrbitalSetSize);
spo_up->evaluate_notranspose(P, first, last, logpsi_work_up, dlogpsi_work_up, d2logpsi_work_up);
spo_dn->evaluate_notranspose(P, first, last, logpsi_work_down, dlogpsi_work_down, d2logpsi_work_down);
for (int iat = 0; iat < nelec; iat++)
{
ParticleSet::Scalar_t s = P.activeSpin(iat);
RealType coss(0.0), sins(0.0);
coss = std::cos(s);
sins = std::sin(s);
ValueType eis(coss, sins);
ValueType emis(coss, -sins);
ValueType eye(0, 1.0);
for (int no = 0; no < OrbitalSetSize; no++)
{
logdet(iat, no) = eis * logpsi_work_up(iat, no) + emis * logpsi_work_down(iat, no);
dlogdet(iat, no) = eis * dlogpsi_work_up(iat, no) + emis * dlogpsi_work_down(iat, no);
d2logdet(iat, no) = eis * d2logpsi_work_up(iat, no) + emis * d2logpsi_work_down(iat, no);
dspinlogdet(iat, no) = eye * (eis * logpsi_work_up(iat, no) - emis * logpsi_work_down(iat, no));
}
}
}
void SpinorSet::evaluate_spin(const ParticleSet& P, int iat, ValueVector_t& psi, ValueVector_t& dpsi)
{

View File

@ -40,6 +40,9 @@ public:
*/
void setOrbitalSetSize(int norbs) override;
//gets the BasisSetSize from the underlying SPOSet that make up the spinor
int getBasisSetSize() const override;
/** evaluate the values of this spinor set
* @param P current ParticleSet
@ -91,6 +94,14 @@ public:
ValueMatrix_t& logdet,
GradMatrix_t& dlogdet,
ValueMatrix_t& d2logdet) override;
void evaluate_notranspose_spin(const ParticleSet& P,
int first,
int last,
ValueMatrix_t& logdet,
GradMatrix_t& dlogdet,
ValueMatrix_t& d2logdet,
ValueMatrix_t& dspinlogdet) override;
/** Evaluate the values, spin gradients, and spin laplacians of single particle spinors corresponding to electron iat.
* @param P current particle set.
* @param iat electron index.

Binary file not shown.

View File

@ -23,6 +23,7 @@ SET(UTEST_HDF_INPUT4 ${qmcpack_SOURCE_DIR}/tests/solids/monoO_noncollinear_1x1x1
SET(UTEST_HDF_INPUT5 ${qmcpack_SOURCE_DIR}/tests/solids/diamondC_1x1x1-Gaussian_pp_Tw_cplx/C_diamond-twist-third.h5)
SET(UTEST_HDF_INPUT6 ${qmcpack_SOURCE_DIR}/src/QMCWaveFunctions/tests/lcao_spinor.h5)
SET(UTEST_HDF_INPUT7 ${qmcpack_SOURCE_DIR}/tests/molecules/LiH_ae_MSD/LiH.orbs.h5)
SET(UTEST_HDF_INPUT8 ${qmcpack_SOURCE_DIR}/src/QMCWaveFunctions/tests/Bi.orbs.h5)
EXECUTE_PROCESS(COMMAND ${CMAKE_COMMAND} -E make_directory "${UTEST_DIR}")
MAYBE_SYMLINK(${UTEST_HDF_INPUT0} ${UTEST_DIR}/diamondC_1x1x1.pwscf.h5)
MAYBE_SYMLINK(${UTEST_HDF_INPUT1} ${UTEST_DIR}/diamondC_2x1x1.pwscf.h5)
@ -32,6 +33,7 @@ MAYBE_SYMLINK(${UTEST_HDF_INPUT4} ${UTEST_DIR}/o2_45deg_spins.pwscf.h5)
MAYBE_SYMLINK(${UTEST_HDF_INPUT5} ${UTEST_DIR}/C_diamond-twist-third.h5)
MAYBE_SYMLINK(${UTEST_HDF_INPUT6} ${UTEST_DIR}/lcao_spinor.h5)
MAYBE_SYMLINK(${UTEST_HDF_INPUT7} ${UTEST_DIR}/LiH.orbs.h5)
MAYBE_SYMLINK(${UTEST_HDF_INPUT8} ${UTEST_DIR}/Bi.orbs.h5)
SET(FILES_TO_COPY he_sto3g.wfj.xml ne_def2_svp.wfnoj.xml hcn.structure.xml hcn.wfnoj.xml hcn_downdet.cuspInfo.xml hcn_updet.cuspInfo.xml
ethanol.structure.xml ethanol.wfnoj.xml ethanol_updet.cuspInfo.xml ethanol_downdet.cuspInfo.xml C_diamond-twist-third.structure.xml C_diamond-twist-third.wfj.xml cartesian_order.wfnoj.xml dirac_order.wfnoj.xml )
@ -106,4 +108,16 @@ if (ENABLE_CUDA)
ADD_UNIT_TEST(${UTEST_NAME} 1 1 "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}")
endif()
if (ENABLE_CUDA AND BUILD_MICRO_BENCHMARKS)
SET(UTEST_EXE benchmark_diracmatrixcompute)
SET(UTEST_NAME deterministic-unit_${UTEST_EXE})
SET(BENCHMARK_SRC makeRngSpdMatrix.cpp benchmark_DiracMatrixComputeCUDA.cpp)
ADD_EXECUTABLE(${UTEST_EXE} ${BENCHMARK_SRC})
TARGET_LINK_LIBRARIES(${UTEST_EXE} catch_benchmark_main qmcwfs Math::BLAS_LAPACK Math::scalar_vector_functions utilities_for_test)
IF(USE_OBJECT_TARGET)
TARGET_LINK_LIBRARIES(${UTEST_EXE} qmcutil qmcparticle containers platform_omptarget)
ENDIF()
TARGET_INCLUDE_DIRECTORIES(${UTEST_EXE} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
ADD_UNIT_TEST(${UTEST_NAME} 1 1 "${QMCPACK_UNIT_TEST_DIR}/${UTEST_EXE}")
endif()

View File

@ -0,0 +1,293 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
/** \file
* This implements micro benchmarking on DiracMatrixComputeCUDA.hpp
* Currently it also benchmarks the same size matrix's and batch sizes
* using the Legacy DiracMatrix serially.
*/
#include "catch.hpp"
#include <algorithm>
#include "Configuration.h"
#include "OhmmsData/Libxml2Doc.h"
#include "OhmmsPETE/OhmmsMatrix.h"
#include "OhmmsPETE/OhmmsVector.h"
#include "QMCWaveFunctions/WaveFunctionComponent.h"
#include "QMCWaveFunctions/Fermion/DiracMatrixComputeCUDA.hpp"
#include "QMCWaveFunctions/tests/makeRngSpdMatrix.hpp"
#include "Utilities/for_testing/checkMatrix.hpp"
#include "Utilities/for_testing/RandomForTest.h"
#include "Platforms/PinnedAllocator.h"
#include "Platforms/CUDA/CUDALinearAlgebraHandles.h"
// Legacy CPU inversion for temporary testing
#include "QMCWaveFunctions/Fermion/DiracMatrix.h"
namespace qmcplusplus
{
#ifdef ENABLE_OFFLOAD
template<typename T>
using OffloadPinnedAllocator = OMPallocator<T, PinnedAlignedAllocator<T>>;
#elif ENABLE_CUDA
template<typename T>
using OffloadPinnedAllocator = DualAllocator<T, CUDAAllocator<T>, PinnedAlignedAllocator<T>>;
#endif
template<typename T>
using OffloadPinnedMatrix = Matrix<T, OffloadPinnedAllocator<T>>;
template<typename T>
using OffloadPinnedVector = Vector<T, OffloadPinnedAllocator<T>>;
// Mechanism to pretty print benchmark names.
struct DiracComputeBenchmarkParameters;
std::ostream& operator<<(std::ostream& out, const DiracComputeBenchmarkParameters& dcbmp);
struct DiracComputeBenchmarkParameters
{
std::string name;
size_t n;
int batch_size;
std::string str()
{
std::stringstream stream;
stream << *this;
return stream.str();
}
};
std::ostream& operator<<(std::ostream& out, const DiracComputeBenchmarkParameters& dcbmp)
{
out << dcbmp.name << " n=" << dcbmp.n << " batch=" << dcbmp.batch_size;
return out;
}
/** This and other [.benchmark] benchmarks only run if "[benchmark]" is explicitly passed as tag to test.
*/
TEST_CASE("DiracMatrixComputeCUDA_large_determinants_benchmark_legacy_1024_4", "[wavefunction][fermion][.benchmark]")
{
DiracComputeBenchmarkParameters params;
params.name = "Batched CUDA";
params.n = 1024;
params.batch_size = 4;
auto cuda_handles = std::make_unique<CUDALinearAlgebraHandles>();
DiracMatrixComputeCUDA<double> dmcc(cuda_handles->hstream);
std::vector<Matrix<double>> spd_mats(params.batch_size, {params.n, params.n});
std::vector<OffloadPinnedMatrix<double>> pinned_spd_mats(params.batch_size, {params.n, params.n});
for (int im = 0; im < params.batch_size; ++im)
{
testing::makeRngSpdMatrix(spd_mats[im]);
for (int i = 0; i < params.n; ++i)
for (int j = 0; j < params.n; ++j)
pinned_spd_mats[im](i, j) = spd_mats[im](i, j);
}
OffloadPinnedVector<std::complex<double>> log_values(params.batch_size);
std::vector<OffloadPinnedMatrix<double>> pinned_inv_mats(params.batch_size, {params.n, params.n});
auto a_mats = makeRefVector<decltype(pinned_spd_mats)::value_type>(pinned_spd_mats);
RefVector<OffloadPinnedMatrix<double>> inv_a_mats =
makeRefVector<decltype(pinned_inv_mats)::value_type>(pinned_inv_mats);
std::vector<bool> compute_mask(params.batch_size, true);
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] { dmcc.mw_invertTranspose(*cuda_handles, a_mats, inv_a_mats, log_values, compute_mask); });
};
DiracMatrix<double> dmat;
std::vector<Matrix<double>> inv_mats_test(params.batch_size, {params.n, params.n});
;
std::vector<std::complex<double>> log_values_test(params.batch_size);
params.name = "legacy CPU";
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] {
for (int im = 0; im < params.batch_size; ++im)
dmat.invert_transpose(spd_mats[im], inv_mats_test[im], log_values_test[im]);
});
};
}
/** This test will run by default.
*/
TEST_CASE("benchmark_DiracMatrixComputeCUDA_vs_legacy_256_10", "[wavefunction][fermion][benchmark]")
{
DiracComputeBenchmarkParameters params;
params.name = "Batched CUDA";
params.n = 256;
params.batch_size = 10;
auto cuda_handles = std::make_unique<CUDALinearAlgebraHandles>();
DiracMatrixComputeCUDA<double> dmcc(cuda_handles->hstream);
std::vector<Matrix<double>> spd_mats(params.batch_size, {params.n, params.n});
std::vector<OffloadPinnedMatrix<double>> pinned_spd_mats(params.batch_size, {params.n, params.n});
for (int im = 0; im < params.batch_size; ++im)
{
testing::makeRngSpdMatrix(spd_mats[im]);
for (int i = 0; i < params.n; ++i)
for (int j = 0; j < params.n; ++j)
pinned_spd_mats[im](i, j) = spd_mats[im](i, j);
}
OffloadPinnedVector<std::complex<double>> log_values(params.batch_size);
std::vector<OffloadPinnedMatrix<double>> pinned_inv_mats(params.batch_size, {params.n, params.n});
auto a_mats = makeRefVector<decltype(pinned_spd_mats)::value_type>(pinned_spd_mats);
RefVector<OffloadPinnedMatrix<double>> inv_a_mats =
makeRefVector<decltype(pinned_inv_mats)::value_type>(pinned_inv_mats);
std::vector<bool> compute_mask(params.batch_size, true);
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] { dmcc.mw_invertTranspose(*cuda_handles, a_mats, inv_a_mats, log_values, compute_mask); });
};
DiracMatrix<double> dmat;
std::vector<Matrix<double>> inv_mats_test(params.batch_size, {params.n, params.n});
;
std::vector<std::complex<double>> log_values_test(params.batch_size);
params.name = "legacy CPU";
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] {
for (int im = 0; im < params.batch_size; ++im)
dmat.invert_transpose(spd_mats[im], inv_mats_test[im], log_values_test[im]);
});
};
}
/** Only runs if [benchmark] tag is passed.
*/
TEST_CASE("benchmark_DiracMatrixComputeCUDASingle_vs_legacy_256_10", "[wavefunction][fermion][.benchmark]")
{
DiracComputeBenchmarkParameters params;
params.name = "Forced Serial Batched CUDA";
params.n = 256;
params.batch_size = 10;
auto cuda_handles = std::make_unique<CUDALinearAlgebraHandles>();
DiracMatrixComputeCUDA<double> dmcc(cuda_handles->hstream);
std::vector<Matrix<double>> spd_mats(params.batch_size, {params.n, params.n});
std::vector<OffloadPinnedMatrix<double>> pinned_spd_mats(params.batch_size, {params.n, params.n});
for (int im = 0; im < params.batch_size; ++im)
{
testing::makeRngSpdMatrix(spd_mats[im]);
for (int i = 0; i < params.n; ++i)
for (int j = 0; j < params.n; ++j)
pinned_spd_mats[im](i, j) = spd_mats[im](i, j);
}
std::vector<OffloadPinnedMatrix<double>> pinned_inv_mats(params.batch_size, {params.n, params.n});
std::vector<OffloadPinnedVector<std::complex<double>>> log_values(params.batch_size);
for (auto& log_value : log_values)
log_value.resize(1, {0, 0});
auto a_mats = makeRefVector<decltype(pinned_spd_mats)::value_type>(pinned_spd_mats);
RefVector<OffloadPinnedMatrix<double>> inv_a_mats =
makeRefVector<decltype(pinned_inv_mats)::value_type>(pinned_inv_mats);
std::vector<bool> compute_mask(params.batch_size, true);
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] {
for (int im = 0; im < params.batch_size; ++im)
dmcc.invert_transpose(*cuda_handles, pinned_spd_mats[im], pinned_inv_mats[im], log_values[im]);
});
};
DiracMatrix<double> dmat;
std::vector<Matrix<double>> inv_mats_test(params.batch_size, {params.n, params.n});
;
std::vector<std::complex<double>> log_values_test(params.batch_size);
params.name = "legacy CPU";
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] {
for (int im = 0; im < params.batch_size; ++im)
dmat.invert_transpose(spd_mats[im], inv_mats_test[im], log_values_test[im]);
});
};
}
/** Only runs if [benchmark] tag is passed.
*/
TEST_CASE("benchmark_DiracMatrixComputeCUDASingle_vs_legacy_1024_4", "[wavefunction][fermion][.benchmark]")
{
DiracComputeBenchmarkParameters params;
params.name = "Forced Serial Batched CUDA";
params.n = 1024;
params.batch_size = 4;
auto cuda_handles = std::make_unique<CUDALinearAlgebraHandles>();
DiracMatrixComputeCUDA<double> dmcc(cuda_handles->hstream);
std::vector<Matrix<double>> spd_mats(params.batch_size, {params.n, params.n});
std::vector<OffloadPinnedMatrix<double>> pinned_spd_mats(params.batch_size, {params.n, params.n});
for (int im = 0; im < params.batch_size; ++im)
{
testing::makeRngSpdMatrix(spd_mats[im]);
for (int i = 0; i < params.n; ++i)
for (int j = 0; j < params.n; ++j)
pinned_spd_mats[im](i, j) = spd_mats[im](i, j);
}
std::vector<OffloadPinnedMatrix<double>> pinned_inv_mats(params.batch_size, {params.n, params.n});
std::vector<OffloadPinnedVector<std::complex<double>>> log_values(params.batch_size);
for (auto& log_value : log_values)
log_value.resize(1, {0, 0});
auto a_mats = makeRefVector<decltype(pinned_spd_mats)::value_type>(pinned_spd_mats);
RefVector<OffloadPinnedMatrix<double>> inv_a_mats =
makeRefVector<decltype(pinned_inv_mats)::value_type>(pinned_inv_mats);
std::vector<bool> compute_mask(params.batch_size, true);
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] {
for (int im = 0; im < params.batch_size; ++im)
dmcc.invert_transpose(*cuda_handles, pinned_spd_mats[im], pinned_inv_mats[im], log_values[im]);
});
};
DiracMatrix<double> dmat;
std::vector<Matrix<double>> inv_mats_test(params.batch_size, {params.n, params.n});
;
std::vector<std::complex<double>> log_values_test(params.batch_size);
params.name = "legacy CPU";
BENCHMARK_ADVANCED(params.str())(Catch::Benchmark::Chronometer meter)
{
meter.measure([&] {
for (int im = 0; im < params.batch_size; ++im)
dmat.invert_transpose(spd_mats[im], inv_mats_test[im], log_values_test[im]);
});
};
}
} // namespace qmcplusplus

View File

@ -30,7 +30,8 @@ namespace qmcplusplus
using RealType = QMCTraits::RealType;
using ValueType = QMCTraits::ValueType;
using LogValueType = std::complex<QMCTraits::QTFull::RealType>;
using LogComplexApprox = Catch::Detail::LogComplexApprox;
TEST_CASE("DiracMatrix_identity", "[wavefunction][fermion]")
{
DiracMatrix<ValueType> dm;
@ -53,7 +54,7 @@ TEST_CASE("DiracMatrix_identity", "[wavefunction][fermion]")
eye(1, 1) = 1.0;
eye(2, 2) = 1.0;
auto check_matrix_result = checkMatrix(m_invT, eye);
CheckMatrixResult check_matrix_result = checkMatrix(m_invT, eye);
CHECKED_ELSE(check_matrix_result.result) { FAIL(check_matrix_result.result_message); }
}

View File

@ -27,13 +27,18 @@ using std::string;
namespace qmcplusplus
{
using PosType = ParticleSet::PosType;
using RealType = ParticleSet::RealType;
using ValueType = ParticleSet::ValueType;
using GradType = ParticleSet::GradType;
using LogValueType = WaveFunctionComponent::LogValueType;
using PsiValueType = WaveFunctionComponent::PsiValueType;
void test_LiH_msd(const std::string& spo_xml_string,
const std::string& check_sponame,
int check_spo_size,
int check_basisset_size,
int test_batched)
int test_nlpp_algorithm_batched,
int test_batched_api)
{
Communicate* c;
c = OHMMS::Controller;
@ -142,7 +147,7 @@ void test_LiH_msd(const std::string& spo_xml_string,
CHECK(dhpsioverpsi[1] == ValueApprox(0.009316665149067847));
CHECK(dhpsioverpsi[nparam - 1] == ValueApprox(0.982665984797896));
if (test_batched)
if (test_nlpp_algorithm_batched)
{
// set virtutal particle position
PosType newpos(0.3, 0.2, 0.5);
@ -181,9 +186,70 @@ void test_LiH_msd(const std::string& spo_xml_string,
CHECK(std::real(ratio_1) == Approx(-0.8544310407));
CHECK(twf.getLogPsi() == Approx(-7.6460278462));
}
// testing batched interfaces
if (test_batched_api)
{
ParticleSet elec_clone(elec_);
elec_clone.update();
std::unique_ptr<TrialWaveFunction> twf_clone(twf.makeClone(elec_clone));
std::vector<PosType> displ(2);
displ[0] = {0.1, 0.2, 0.3};
displ[1] = {-0.2, -0.3, 0.0};
RefVectorWithLeader<ParticleSet> p_ref_list(elec_, {elec_, elec_clone});
RefVectorWithLeader<TrialWaveFunction> wf_ref_list(twf, {twf, *twf_clone});
ParticleSet::mw_update(p_ref_list);
TrialWaveFunction::mw_evaluateLog(wf_ref_list, p_ref_list);
std::cout << "before YYY [0] getLogPsi getPhase " << std::setprecision(16) << wf_ref_list[0].getLogPsi() << " "
<< wf_ref_list[0].getPhase() << std::endl;
std::cout << "before YYY [1] getLogPsi getPhase " << std::setprecision(16) << wf_ref_list[1].getLogPsi() << " "
<< wf_ref_list[1].getPhase() << std::endl;
CHECK(std::complex<RealType>(wf_ref_list[0].getLogPsi(), wf_ref_list[0].getPhase()) ==
LogComplexApprox(std::complex<RealType>(-7.803347327300153, 0.0)));
CHECK(std::complex<RealType>(wf_ref_list[1].getLogPsi(), wf_ref_list[1].getPhase()) ==
LogComplexApprox(std::complex<RealType>(-7.803347327300153, 0.0)));
std::vector<GradType> grad_old(2);
const int moved_elec_id = 1;
TrialWaveFunction::mw_evalGrad(wf_ref_list, p_ref_list, moved_elec_id, grad_old);
CHECK(grad_old[0][0] == ValueApprox(-2.6785305398));
CHECK(grad_old[0][1] == ValueApprox(-1.7953759996));
CHECK(grad_old[0][2] == ValueApprox(-5.8209379274));
CHECK(grad_old[1][0] == ValueApprox(-2.6785305398));
CHECK(grad_old[1][1] == ValueApprox(-1.7953759996));
CHECK(grad_old[1][2] == ValueApprox(-5.8209379274));
std::vector<GradType> grad_new(2);
std::vector<PsiValueType> ratios(2);
ParticleSet::mw_makeMove(p_ref_list, moved_elec_id, displ);
TrialWaveFunction::mw_calcRatio(wf_ref_list, p_ref_list, moved_elec_id, ratios);
CHECK(ratios[0] == ValueApprox(PsiValueType(-0.6181619459)));
CHECK(ratios[1] == ValueApprox(PsiValueType(1.6186330488)));
TrialWaveFunction::mw_calcRatioGrad(wf_ref_list, p_ref_list, moved_elec_id, ratios, grad_new);
CHECK(ratios[0] == ValueApprox(PsiValueType(-0.6181619459)));
CHECK(ratios[1] == ValueApprox(PsiValueType(1.6186330488)));
CHECK(grad_new[0][0] == ValueApprox(1.2418467899));
CHECK(grad_new[0][1] == ValueApprox(1.2425653495));
CHECK(grad_new[0][2] == ValueApprox(4.4273237873));
CHECK(grad_new[1][0] == ValueApprox(-0.8633778143));
CHECK(grad_new[1][1] == ValueApprox(0.8245347691));
CHECK(grad_new[1][2] == ValueApprox(-5.1513380151));
}
}
TEST_CASE("LiH multi Slater dets", "[wavefunction]")
TEST_CASE("LiH multi Slater dets table_method", "[wavefunction]")
{
app_log() << "-----------------------------------------------------------------" << std::endl;
app_log() << "LiH_msd using the table method no precomputation" << std::endl;
@ -209,8 +275,11 @@ TEST_CASE("LiH multi Slater dets", "[wavefunction]")
</determinantset> \
</wavefunction> \
";
test_LiH_msd(spo_xml_string1, "spo-up", 85, 105, true);
test_LiH_msd(spo_xml_string1, "spo-up", 85, 105, true, true);
}
TEST_CASE("LiH multi Slater dets all_determinants", "[wavefunction]")
{
app_log() << "-----------------------------------------------------------------" << std::endl;
app_log() << "LiH_msd using the traditional slow method with all the determinants" << std::endl;
app_log() << "-----------------------------------------------------------------" << std::endl;
@ -235,8 +304,14 @@ TEST_CASE("LiH multi Slater dets", "[wavefunction]")
</determinantset> \
</wavefunction> \
";
test_LiH_msd(spo_xml_string1_slow, "spo-up", 85, 105, false);
/* NOTE: test_batched_api is set false because of unexpected failures.
all_determinants should pass all the existing tests. There are likely bugs.
*/
test_LiH_msd(spo_xml_string1_slow, "spo-up", 85, 105, false, false);
}
TEST_CASE("LiH multi Slater dets precomputed_table_method", "[wavefunction]")
{
app_log() << "-----------------------------------------------------------------" << std::endl;
app_log() << "LiH_msd using the table method with new optimization" << std::endl;
app_log() << "-----------------------------------------------------------------" << std::endl;
@ -261,6 +336,158 @@ TEST_CASE("LiH multi Slater dets", "[wavefunction]")
</determinantset> \
</wavefunction> \
";
test_LiH_msd(spo_xml_string1_new, "spo-up", 85, 105, true);
test_LiH_msd(spo_xml_string1_new, "spo-up", 85, 105, true, true);
}
#ifdef QMC_COMPLEX
void test_Bi_msd(const std::string& spo_xml_string,
const std::string& check_sponame,
int check_spo_size,
int check_basisset_size)
{
Communicate* c;
c = OHMMS::Controller;
auto ions_uptr = std::make_unique<ParticleSet>();
auto elec_uptr = std::make_unique<ParticleSet>();
ParticleSet& ions_(*ions_uptr);
ParticleSet& elec_(*elec_uptr);
ions_.setName("ion0");
ions_.create({1});
ions_.R[0] = {0.0, 0.0, 0.0};
SpeciesSet& ispecies = ions_.getSpeciesSet();
int LiIdx = ispecies.addSpecies("Bi");
elec_.setName("elec");
elec_.create({5});
elec_.R[0] = {1.592992772, -2.241313928, -0.7315193518};
elec_.R[1] = {0.07621077199, 0.8497557547, 1.604678718};
elec_.R[2] = {2.077473445, 0.680621113, -0.5251243321};
elec_.R[3] = {-1.488849594, 0.7470552741, 0.6659555498};
elec_.R[4] = {-1.448485879, 0.7337274141, 0.02687190951};
elec_.spins[0] = 4.882003828;
elec_.spins[1] = 0.06469299507;
elec_.spins[2] = 5.392168887;
elec_.spins[3] = 5.33941214;
elec_.spins[4] = 3.127416326;
elec_.is_spinor_ = true;
SpeciesSet& tspecies = elec_.getSpeciesSet();
int upIdx = tspecies.addSpecies("u");
int massIdx = tspecies.addAttribute("mass");
tspecies(massIdx, upIdx) = 1.0;
// Necessary to set mass
elec_.resetGroups();
// Need 1 electron and 1 proton, somehow
//ParticleSet target = ParticleSet();
ParticleSetPool ptcl = ParticleSetPool(c);
ptcl.addParticleSet(std::move(elec_uptr));
ptcl.addParticleSet(std::move(ions_uptr));
Libxml2Document doc;
bool okay = doc.parseFromString(spo_xml_string);
REQUIRE(okay);
xmlNodePtr ein_xml = doc.getRoot();
WaveFunctionFactory wf_factory("psi0", elec_, ptcl.getPool(), c);
wf_factory.put(ein_xml);
SPOSet* spo_ptr(wf_factory.getSPOSet(check_sponame));
REQUIRE(spo_ptr != nullptr);
CHECK(spo_ptr->getOrbitalSetSize() == check_spo_size);
CHECK(spo_ptr->getBasisSetSize() == check_basisset_size);
ions_.update();
elec_.update();
auto& twf(*wf_factory.getTWF());
twf.setMassTerm(elec_);
twf.evaluateLog(elec_);
//Reference values from QWalk with SOC
std::cout << "twf.evaluateLog logpsi " << std::setprecision(16) << twf.getLogPsi() << " " << twf.getPhase()
<< std::endl;
CHECK(std::complex<double>(twf.getLogPsi(), twf.getPhase()) ==
LogComplexApprox(std::complex<double>(-10.0084091, 1.153302116)));
twf.prepareGroup(elec_, 0);
ParticleSet::ComplexType spingrad_old;
auto grad_old = twf.evalGradWithSpin(elec_, 1, spingrad_old);
std::cout << "twf.evalGrad grad_old " << std::setprecision(16) << grad_old << std::endl;
CHECK(grad_old[0] == ComplexApprox(ValueType(0.2037139, -0.0468526)).epsilon(1e-4));
CHECK(grad_old[1] == ComplexApprox(ValueType(-0.2452648, 0.0711994)).epsilon(1e-4));
CHECK(grad_old[2] == ComplexApprox(ValueType(0.0371131, 0.0239808)).epsilon(1e-4));
PosType delta(0.464586, 0.75017, 1.184383);
double ds = 0.12;
elec_.makeMoveWithSpin(0, delta, ds);
ParticleSet::GradType grad_new;
ParticleSet::ComplexType spingrad_new;
auto ratio = twf.calcRatioGradWithSpin(elec_, 0, grad_new, spingrad_new);
std::cout << "twf.calcRatioGrad ratio " << ratio << " grad_new " << grad_new << std::endl;
CHECK(ValueType(std::abs(ratio)) == ValueApprox(0.650438041).epsilon(1e-4));
CHECK(grad_new[0] == ComplexApprox(ValueType(-0.947982, -0.1390323)).epsilon(1e-4));
CHECK(grad_new[1] == ComplexApprox(ValueType(-0.428998, -0.3268736)).epsilon(1e-4));
CHECK(grad_new[2] == ComplexApprox(ValueType(-0.587935, -0.2278003)).epsilon(1e-4));
ratio = twf.calcRatio(elec_, 0);
std::cout << "twf.calcRatio ratio " << ratio << std::endl;
CHECK(ValueType(std::abs(ratio)) == ValueApprox(0.650438041).epsilon(1e-4));
}
TEST_CASE("Bi-spinor multi Slater dets", "[wavefunction]")
{
app_log() << "-----------------------------------------------------------------" << std::endl;
app_log() << "Bi using the table method no precomputation" << std::endl;
app_log() << "-----------------------------------------------------------------" << std::endl;
const char* spo_xml_string1 = "<wavefunction name=\"psi0\" target=\"e\"> \
<sposet_builder name=\"spinorbuilder\" type=\"molecularorbital\" source=\"ion0\" transform=\"yes\" href=\"Bi.orbs.h5\" precision=\"double\"> \
<sposet name=\"myspo\" size=\"8\"> \
<occupation mode=\"ground\"/> \
</sposet> \
</sposet_builder> \
<determinantset> \
<multideterminant optimize=\"no\" spo_0=\"myspo\" algorithm=\"table_method\"> \
<detlist size=\"5\" type=\"DETS\" nc0=\"0\" ne0=\"5\" nstates=\"8\" cutoff=\"1e-20\"> \
<ci coeff=\"-0.8586\" occ0=\"11111000\"/> \
<ci coeff=\"-0.2040\" occ0=\"11101100\"/> \
<ci coeff=\"0.2040\" occ0=\"11100011\"/> \
<ci coeff=\"0.2340\" occ0=\"11001011\"/> \
<ci coeff=\"0.3534\" occ0=\"11010101\"/> \
</detlist> \
</multideterminant> \
</determinantset> \
</wavefunction>";
test_Bi_msd(spo_xml_string1, "myspo", 8, 123);
app_log() << "-----------------------------------------------------------------" << std::endl;
app_log() << "Bi using the table method with new optimization" << std::endl;
app_log() << "-----------------------------------------------------------------" << std::endl;
const char* spo_xml_string1_new = "<wavefunction name=\"psi0\" target=\"e\"> \
<sposet_builder name=\"spinorbuilder\" type=\"molecularorbital\" source=\"ion0\" transform=\"yes\" href=\"Bi.orbs.h5\" precision=\"double\"> \
<sposet name=\"myspo\" size=\"8\"> \
<occupation mode=\"ground\"/> \
</sposet> \
</sposet_builder> \
<determinantset> \
<multideterminant optimize=\"no\" spo_0=\"myspo\" algorithm=\"precomputed_table_method\"> \
<detlist size=\"5\" type=\"DETS\" nc0=\"0\" ne0=\"5\" nstates=\"8\" cutoff=\"1e-20\"> \
<ci coeff=\"-0.8586\" occ0=\"11111000\"/> \
<ci coeff=\"-0.2040\" occ0=\"11101100\"/> \
<ci coeff=\"0.2040\" occ0=\"11100011\"/> \
<ci coeff=\"0.2340\" occ0=\"11001011\"/> \
<ci coeff=\"0.3534\" occ0=\"11010101\"/> \
</detlist> \
</multideterminant> \
</determinantset> \
</wavefunction>";
test_Bi_msd(spo_xml_string1_new, "myspo", 8, 123);
}
#endif
} // namespace qmcplusplus

View File

@ -13,6 +13,3 @@ SET(test_utilities_src
add_library(utilities_for_test ${test_utilities_src})
target_include_directories(utilities_for_test PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
target_include_directories(utilities_for_test PRIVATE "${PROJECT_SOURCE_DIR}/external_codes/catch")
add_library(catch_main_no_mpi catch_main_no_mpi.cpp)
TARGET_INCLUDE_DIRECTORIES(catch_main_no_mpi PUBLIC "${PROJECT_SOURCE_DIR}/external_codes/catch")

View File

@ -1,38 +0,0 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2021 QMCPACK developers.
//
// File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//
// File created by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
//////////////////////////////////////////////////////////////////////////////////////
#define CATCH_CONFIG_RUNNER
#include "catch.hpp"
// Replacement unit test main function to ensure that MPI is finalized once
// (and only once) at the end of the unit test.
int main(int argc, char* argv[])
{
Catch::Session session;
using namespace Catch::clara;
// Build command line parser.
auto cli = session.cli();
session.cli(cli);
// Parse arguments.
int parser_err = session.applyCommandLine(argc, argv);
// Run the tests.
int result = session.run(argc, argv);
if (parser_err != 0)
{
return parser_err;
}
else
{
return result;
}
}

View File

@ -82,6 +82,5 @@ extern template bool approxEquality<float>(float val_a, float val_b);
extern template bool approxEquality<std::complex<float>>(std::complex<float> val_a, std::complex<float> val_b);
extern template bool approxEquality<double>(double val_a, double val_b);
extern template bool approxEquality<std::complex<double>>(std::complex<double> val_a, std::complex<double> val_b);
} // namespace qmcplusplus
#endif

View File

@ -0,0 +1,23 @@
<?xml version="1.0"?>
<simulation>
<project id="qmc_short-MD-C4_AE_Mol_Ground_QP_BATCHED" series="2"/>
<random seed="864"/>
<include href="C4_AE_Mol_QP.structure.xml"/>
<include href="C4_AE_Mol_QP_Ground.wfnoj.xml"/>
<hamiltonian name="h0" type="generic" target="e">
<pairpot name="ElecElec" type="coulomb" source="e" target="e" physical="true"/>
<pairpot name="IonIon" type="coulomb" source="ion0" target="ion0"/>
<pairpot name="IonElec" type="coulomb" source="ion0" target="e"/>
</hamiltonian>
<qmc method="vmc_batched" move="pbyp" gpu="yes">
<!--qmc method="vmc" move="pbyp" gpu="yes"-->
<estimator name="LocalEnergy" hdf5="no"/>
<parameter name="walkers_per_rank" > 16 </parameter> <!--BATCH-->
<parameter name="blocks" > 1000 </parameter>
<parameter name="steps" > 8.0 </parameter>
<parameter name="subSteps" > 2 </parameter>
<parameter name="timestep" > 0.3 </parameter>
<parameter name="warmupSteps" > 100 </parameter>
<parameter name="usedrift" > yes </parameter>
</qmc>
</simulation>