Merge branch 'develop' into remove-cuda

This commit is contained in:
Ye Luo 2022-04-28 09:50:15 -05:00 committed by GitHub
commit 7d694b1282
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 157 additions and 89 deletions

View File

@ -316,39 +316,41 @@ Batched ``vmc`` driver (experimental)
parameters:
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
+================================+==============+=========================+=============+===============================================+
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``substeps`` | integer | :math:`\geq 0` | 1 | Number of substeps per step |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``usedrift`` | text | yes,no | yes | Use the algorithm with drift |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``samples`` (not ready) | integer | :math:`\geq 0` | 0 | Number of walker samples for in this VMC run |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``storeconfigs`` (not ready) | integer | all values | 0 | Write configurations to files |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
+================================+==============+=========================+=============+=================================================+
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``substeps`` | integer | :math:`\geq 0` | 1 | Number of substeps per step |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``usedrift`` | text | yes,no | yes | Use the algorithm with drift |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``samples`` (not ready) | integer | :math:`\geq 0` | 0 | Number of walker samples for in this VMC run |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``storeconfigs`` (not ready) | integer | all values | 0 | Write configurations to files |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
Additional information:
@ -1566,47 +1568,49 @@ Batched ``dmc`` driver (experimental)
parameters:
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
+================================+==============+=========================+=============+===============================================+
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``nonlocalmoves`` | string | yes, no, v0, v1, v3 | no | Run with T-moves |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``branching_cutoff_scheme`` | string | classic/DRV/ZSGMA/YL | classic | Branch cutoff scheme |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``feedback`` | double | :math:`\geq 0` | 1.0 | Population feedback on the trial energy |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``sigmaBound`` | 10 | :math:`\geq 0` | 10 | Parameter to cutoff large weights |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``reconfiguration`` | string | yes/pure/other | no | Fixed population technique |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``storeconfigs`` | integer | all values | 0 | Store configurations |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``use_nonblocking`` | string | yes/no | yes | Using nonblocking send/recv |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``debug_disable_branching`` | string | yes/no | no | Disable branching for debugging |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
+================================+==============+=========================+=============+=================================================+
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``nonlocalmoves`` | string | yes, no, v0, v1, v3 | no | Run with T-moves |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``branching_cutoff_scheme`` | string | classic/DRV/ZSGMA/YL | classic | Branch cutoff scheme |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``feedback`` | double | :math:`\geq 0` | 1.0 | Population feedback on the trial energy |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``sigmaBound`` | 10 | :math:`\geq 0` | 10 | Parameter to cutoff large weights |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``reconfiguration`` | string | yes/pure/other | no | Fixed population technique |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``storeconfigs`` | integer | all values | 0 | Store configurations |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``use_nonblocking`` | string | yes/no | yes | Using nonblocking send/recv |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``debug_disable_branching`` | string | yes/no | no | Disable branching for debugging |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
| ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block |
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
- ``crowds`` The number of crowds that the walkers are subdivided into on each MPI rank. If not provided, it is set equal to the number of OpenMP threads.

View File

@ -103,7 +103,7 @@ void DMCBatched::advanceWalkers(const StateForThread& sft,
}
const int num_walkers = crowd.size();
auto& pset_leader = walker_elecs.getLeader();
auto& pset_leader = walker_elecs.getLeader();
const int num_particles = pset_leader.getTotalNum();
MCCoords<CT> drifts(num_walkers), drifts_reverse(num_walkers);
@ -428,18 +428,22 @@ bool DMCBatched::run()
ScopedTimer local_timer(timers_.init_walkers_timer);
ParallelExecutor<> section_start_task;
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
}
print_mem("DMCBatched after initialLogEvaluation", app_summary());
{
FullPrecRealType energy, variance;
population_.measureGlobalEnergyVariance(*myComm, energy, variance);
// false indicates we do not support kill at node crossings.
branch_engine_->initParam(population_, energy, variance, dmcdriver_input_.get_reconfiguration(), false);
walker_controller_->setTrialEnergy(branch_engine_->getEtrial());
print_mem("DMCBatched after initialLogEvaluation", app_summary());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("InitialLogEvaluation");
}
// this barrier fences all previous load imbalance. Avoid block 0 timing pollution.
myComm->barrier();
ScopedTimer local_timer(timers_.production_timer);
ParallelExecutor<> crowd_task;
for (int block = 0; block < num_blocks; ++block)
@ -475,6 +479,8 @@ bool DMCBatched::run()
population_.redistributeWalkers(crowds_);
}
print_mem("DMCBatched after a block", app_debug_stream());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Block " + std::to_string(block));
endBlock();
dmc_loop.stop();

View File

@ -36,6 +36,7 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
std::string serialize_walkers;
std::string debug_checks_str;
std::string measure_imbalance_str;
ParameterSet parameter_set;
parameter_set.add(store_config_period_, "storeconfigs");
@ -70,6 +71,7 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
parameter_set.add(max_disp_sq_, "maxDisplSq");
parameter_set.add(debug_checks_str, "debug_checks",
{"no", "all", "checkGL_after_load", "checkGL_after_moves", "checkGL_after_tmove"});
parameter_set.add(measure_imbalance_str, "measure_imbalance", {"no", "yes"});
OhmmsAttributeSet aAttrib;
// first stage in from QMCDriverFactory
@ -139,6 +141,9 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
debug_checks_ |= DriverDebugChecks::CHECKGL_AFTER_TMOVE;
}
if (measure_imbalance_str == "yes")
measure_imbalance_ = true;
if (check_point_period_.period < 1)
check_point_period_.period = max_blocks_;
}

View File

@ -31,8 +31,8 @@ public:
void readXML(xmlNodePtr cur);
// To allow compile check if move constructor is still implicit
QMCDriverInput() = default;
QMCDriverInput(const QMCDriverInput&) = default;
QMCDriverInput() = default;
QMCDriverInput(const QMCDriverInput&) = default;
QMCDriverInput& operator=(const QMCDriverInput&) = default;
QMCDriverInput(QMCDriverInput&&) noexcept;
QMCDriverInput& operator=(QMCDriverInput&&) noexcept;
@ -41,6 +41,9 @@ protected:
bool scoped_profiling_ = false;
/// determine additional checks for debugging purpose
DriverDebugChecks debug_checks_ = DriverDebugChecks::ALL_OFF;
/// measure load imbalance (add a barrier) before data aggregation (obvious synchronization)
bool measure_imbalance_ = false;
/** @ingroup Input Parameters for QMCDriver base class
* @{
* All input determined variables should be here
@ -128,13 +131,14 @@ public:
DriverDebugChecks get_debug_checks() const { return debug_checks_; }
bool get_scoped_profiling() const { return scoped_profiling_; }
bool are_walkers_serialized() const { return crowd_serialize_walkers_; }
bool get_measure_imbalance() const { return measure_imbalance_; }
const std::string get_drift_modifier() const { return drift_modifier_; }
RealType get_drift_modifier_unr_a() const { return drift_modifier_unr_a_; }
};
// These will cause a compiler error if the implicit move constructor has been broken
inline QMCDriverInput::QMCDriverInput(QMCDriverInput&&) noexcept = default;
inline QMCDriverInput::QMCDriverInput(QMCDriverInput&&) noexcept = default;
inline QMCDriverInput& QMCDriverInput::operator=(QMCDriverInput&&) noexcept = default;
} // namespace qmcplusplus

View File

@ -31,6 +31,7 @@
#include "Concurrency/Info.hpp"
#include "QMCDrivers/GreenFunctionModifiers/DriftModifierBuilder.h"
#include "Utilities/StlPrettyPrint.hpp"
#include "Utilities/Timer.h"
#include "Message/UniformCommunicateError.h"
namespace qmcplusplus
@ -122,6 +123,8 @@ void QMCDriverNew::checkNumCrowdsLTNumThreads(const int num_crowds)
*/
void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCounts& awc)
{
ScopedTimer local_timer(timers_.startup_timer);
app_summary() << QMCType << " Driver running with" << std::endl
<< " total_walkers = " << awc.global_walkers << std::endl
<< " walkers_per_rank = " << awc.walkers_per_rank << std::endl
@ -159,6 +162,9 @@ void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCou
// Once they are created move contexts can be created.
createRngsStepContexts(crowds_.size());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Startup");
}
/** QMCDriverNew ignores h5name if you want to read and h5 config you have to explicitly
@ -218,9 +224,8 @@ void QMCDriverNew::recordBlock(int block)
{
if (qmcdriver_input_.get_dump_config() && block % qmcdriver_input_.get_check_point_period().period == 0)
{
timers_.checkpoint_timer.start();
ScopedTimer local_timer(timers_.checkpoint_timer);
RandomNumberControl::write(root_name_, myComm);
timers_.checkpoint_timer.stop();
}
}
@ -234,8 +239,7 @@ bool QMCDriverNew::finalize(int block, bool dumpwalkers)
return true;
}
void QMCDriverNew::makeLocalWalkers(IndexType nwalkers,
RealType reserve)
void QMCDriverNew::makeLocalWalkers(IndexType nwalkers, RealType reserve)
{
ScopedTimer local_timer(timers_.create_walkers_timer);
// ensure nwalkers local walkers in population_
@ -462,6 +466,7 @@ QMCDriverNew::AdjustedWalkerCounts QMCDriverNew::adjustGlobalWalkerCount(int num
*/
void QMCDriverNew::endBlock()
{
ScopedTimer local_timer(timers_.endblock_timer);
RefVector<ScalarEstimatorBase> all_scalar_estimators;
FullPrecRealType total_block_weight = 0.0;
@ -570,4 +575,28 @@ void QMCDriverNew::checkLogAndGL(Crowd& crowd, const std::string_view location)
throw std::runtime_error(std::string("checkLogAndGL failed at ") + std::string(location) + std::string("\n"));
}
void QMCDriverNew::measureImbalance(const std::string& tag) const
{
ScopedTimer local_timer(timers_.imbalance_timer);
Timer only_this_barrier;
myComm->barrier();
std::vector<double> my_barrier_time(1, only_this_barrier.elapsed());
std::vector<double> barrier_time_all_ranks(myComm->size(), 0.0);
myComm->gather(my_barrier_time, barrier_time_all_ranks, 0);
if (!myComm->rank())
{
auto const count = static_cast<double>(barrier_time_all_ranks.size());
const auto max_it = std::max_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
const auto min_it = std::min_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
app_log() << std::endl
<< tag << " imbalance (slow ranks wait less):" << std::endl
<< " average wait seconds = "
<< std::accumulate(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end(), 0.0) / count << std::endl
<< " min wait at rank " << std::distance(barrier_time_all_ranks.begin(), min_it)
<< ", seconds = " << *min_it << std::endl
<< " max wait at rank " << std::distance(barrier_time_all_ranks.begin(), max_it)
<< ", seconds = " << *max_it << std::endl;
}
}
} // namespace qmcplusplus

View File

@ -102,6 +102,9 @@ public:
std::bitset<QMC_MODE_MAX> qmc_driver_mode_;
protected:
/// inject additional barrier and measure load imbalance.
void measureImbalance(const std::string& tag) const;
/// end of a block operations. Aggregates statistics across all MPI ranks and write to disk.
void endBlock();
/** This is a data structure strictly for QMCDriver and its derived classes
*
@ -325,6 +328,10 @@ protected:
NewTimer& hamiltonian_timer;
NewTimer& collectables_timer;
NewTimer& estimators_timer;
NewTimer& imbalance_timer;
NewTimer& endblock_timer;
NewTimer& startup_timer;
NewTimer& production_timer;
NewTimer& resource_timer;
DriverTimers(const std::string& prefix)
: checkpoint_timer(*timer_manager.createTimer(prefix + "CheckPoint", timer_level_medium)),
@ -336,6 +343,10 @@ protected:
hamiltonian_timer(*timer_manager.createTimer(prefix + "Hamiltonian", timer_level_medium)),
collectables_timer(*timer_manager.createTimer(prefix + "Collectables", timer_level_medium)),
estimators_timer(*timer_manager.createTimer(prefix + "Estimators", timer_level_medium)),
imbalance_timer(*timer_manager.createTimer(prefix + "Imbalance", timer_level_medium)),
endblock_timer(*timer_manager.createTimer(prefix + "BlockEndDataAggregation", timer_level_medium)),
startup_timer(*timer_manager.createTimer(prefix + "Startup", timer_level_medium)),
production_timer(*timer_manager.createTimer(prefix + "Production", timer_level_medium)),
resource_timer(*timer_manager.createTimer(prefix + "Resources", timer_level_medium))
{}
};

View File

@ -66,7 +66,7 @@ void VMCBatched::advanceWalkers(const StateForThread& sft,
timers.movepbyp_timer.start();
const int num_walkers = crowd.size();
auto& walker_leader = walker_elecs.getLeader();
auto& walker_leader = walker_elecs.getLeader();
const int num_particles = walker_leader.getTotalNum();
// Note std::vector<bool> is not like the rest of stl.
std::vector<bool> moved(num_walkers, false);
@ -299,10 +299,12 @@ bool VMCBatched::run()
ScopedTimer local_timer(timers_.init_walkers_timer);
ParallelExecutor<> section_start_task;
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
print_mem("VMCBatched after initialLogEvaluation", app_summary());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("InitialLogEvaluation");
}
print_mem("VMCBatched after initialLogEvaluation", app_summary());
ScopedTimer local_timer(timers_.production_timer);
ParallelExecutor<> crowd_task;
if (qmcdriver_input_.get_warmup_steps() > 0)
@ -331,8 +333,13 @@ bool VMCBatched::run()
app_log() << "Warm-up is completed!" << std::endl;
print_mem("VMCBatched after Warmup", app_log());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Warmup");
}
// this barrier fences all previous load imbalance. Avoid block 0 timing pollution.
myComm->barrier();
for (int block = 0; block < num_blocks; ++block)
{
vmc_loop.start();
@ -362,6 +369,8 @@ bool VMCBatched::run()
}
}
print_mem("VMCBatched after a block", app_debug_stream());
if (qmcdriver_input_.get_measure_imbalance())
measureImbalance("Block " + std::to_string(block));
endBlock();
vmc_loop.stop();