mirror of https://github.com/QMCPACK/qmcpack.git
Merge pull request #3985 from ye-luo/measure-load-imbalance
Add a batched driver option to measure imbalance
This commit is contained in:
commit
6f03d10495
152
docs/methods.rst
152
docs/methods.rst
|
@ -316,39 +316,41 @@ Batched ``vmc`` driver (experimental)
|
||||||
|
|
||||||
parameters:
|
parameters:
|
||||||
|
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
|
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
|
||||||
+================================+==============+=========================+=============+===============================================+
|
+================================+==============+=========================+=============+=================================================+
|
||||||
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
|
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
|
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
|
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
|
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
|
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
|
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``substeps`` | integer | :math:`\geq 0` | 1 | Number of substeps per step |
|
| ``substeps`` | integer | :math:`\geq 0` | 1 | Number of substeps per step |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``usedrift`` | text | yes,no | yes | Use the algorithm with drift |
|
| ``usedrift`` | text | yes,no | yes | Use the algorithm with drift |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
|
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``samples`` (not ready) | integer | :math:`\geq 0` | 0 | Number of walker samples for in this VMC run |
|
| ``samples`` (not ready) | integer | :math:`\geq 0` | 0 | Number of walker samples for in this VMC run |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``storeconfigs`` (not ready) | integer | all values | 0 | Write configurations to files |
|
| ``storeconfigs`` (not ready) | integer | all values | 0 | Write configurations to files |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
|
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
|
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
|
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
|
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
|
| ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block |
|
||||||
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
|
|
||||||
|
|
||||||
Additional information:
|
Additional information:
|
||||||
|
@ -1566,47 +1568,49 @@ Batched ``dmc`` driver (experimental)
|
||||||
|
|
||||||
parameters:
|
parameters:
|
||||||
|
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
|
| **Name** | **Datatype** | **Values** | **Default** | **Description** |
|
||||||
+================================+==============+=========================+=============+===============================================+
|
+================================+==============+=========================+=============+=================================================+
|
||||||
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
|
| ``total_walkers`` | integer | :math:`> 0` | 1 | Total number of walkers over all MPI ranks |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
|
| ``walkers_per_rank`` | integer | :math:`> 0` | 1 | Number of walkers per MPI rank |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
|
| ``crowds`` | integer | :math:`> 0` | dep. | Number of desynchronized dwalker crowds |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
|
| ``blocks`` | integer | :math:`\geq 0` | 1 | Number of blocks |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
|
| ``steps`` | integer | :math:`\geq 0` | 1 | Number of steps per block |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
|
| ``warmupsteps`` | integer | :math:`\geq 0` | 0 | Number of steps for warming up |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
|
| ``timestep`` | real | :math:`> 0` | 0.1 | Time step for each electron move |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``nonlocalmoves`` | string | yes, no, v0, v1, v3 | no | Run with T-moves |
|
| ``nonlocalmoves`` | string | yes, no, v0, v1, v3 | no | Run with T-moves |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``branching_cutoff_scheme`` | string | classic/DRV/ZSGMA/YL | classic | Branch cutoff scheme |
|
| ``branching_cutoff_scheme`` | string | classic/DRV/ZSGMA/YL | classic | Branch cutoff scheme |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
|
| ``blocks_between_recompute`` | integer | :math:`\geq 0` | dep. | Wavefunction recompute frequency |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``feedback`` | double | :math:`\geq 0` | 1.0 | Population feedback on the trial energy |
|
| ``feedback`` | double | :math:`\geq 0` | 1.0 | Population feedback on the trial energy |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``sigmaBound`` | 10 | :math:`\geq 0` | 10 | Parameter to cutoff large weights |
|
| ``sigmaBound`` | 10 | :math:`\geq 0` | 10 | Parameter to cutoff large weights |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``reconfiguration`` | string | yes/pure/other | no | Fixed population technique |
|
| ``reconfiguration`` | string | yes/pure/other | no | Fixed population technique |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``storeconfigs`` | integer | all values | 0 | Store configurations |
|
| ``storeconfigs`` | integer | all values | 0 | Store configurations |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``use_nonblocking`` | string | yes/no | yes | Using nonblocking send/recv |
|
| ``use_nonblocking`` | string | yes/no | yes | Using nonblocking send/recv |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``debug_disable_branching`` | string | yes/no | no | Disable branching for debugging |
|
| ``debug_disable_branching`` | string | yes/no | no | Disable branching for debugging |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
|
| ``crowd_serialize_walkers`` | integer | yes, no | no | Force use of single walker APIs (for testing) |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
|
| ``debug_checks`` | text | see additional info | dep. | Turn on/off additional recompute and checks |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
|
| ``spin_mass`` | real | :math:`\geq 0` | 1.0 | Effective mass for spin sampling |
|
||||||
+--------------------------------+--------------+-------------------------+-------------+-----------------------------------------------+
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
|
| ``measure_imbalance`` | text | yes,no | no | Measure load imbalance at the end of each block |
|
||||||
|
+--------------------------------+--------------+-------------------------+-------------+-------------------------------------------------+
|
||||||
|
|
||||||
|
|
||||||
- ``crowds`` The number of crowds that the walkers are subdivided into on each MPI rank. If not provided, it is set equal to the number of OpenMP threads.
|
- ``crowds`` The number of crowds that the walkers are subdivided into on each MPI rank. If not provided, it is set equal to the number of OpenMP threads.
|
||||||
|
|
|
@ -103,7 +103,7 @@ void DMCBatched::advanceWalkers(const StateForThread& sft,
|
||||||
}
|
}
|
||||||
|
|
||||||
const int num_walkers = crowd.size();
|
const int num_walkers = crowd.size();
|
||||||
auto& pset_leader = walker_elecs.getLeader();
|
auto& pset_leader = walker_elecs.getLeader();
|
||||||
const int num_particles = pset_leader.getTotalNum();
|
const int num_particles = pset_leader.getTotalNum();
|
||||||
|
|
||||||
MCCoords<CT> drifts(num_walkers), drifts_reverse(num_walkers);
|
MCCoords<CT> drifts(num_walkers), drifts_reverse(num_walkers);
|
||||||
|
@ -428,18 +428,22 @@ bool DMCBatched::run()
|
||||||
ScopedTimer local_timer(timers_.init_walkers_timer);
|
ScopedTimer local_timer(timers_.init_walkers_timer);
|
||||||
ParallelExecutor<> section_start_task;
|
ParallelExecutor<> section_start_task;
|
||||||
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
|
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
|
||||||
}
|
|
||||||
|
|
||||||
print_mem("DMCBatched after initialLogEvaluation", app_summary());
|
|
||||||
|
|
||||||
{
|
|
||||||
FullPrecRealType energy, variance;
|
FullPrecRealType energy, variance;
|
||||||
population_.measureGlobalEnergyVariance(*myComm, energy, variance);
|
population_.measureGlobalEnergyVariance(*myComm, energy, variance);
|
||||||
// false indicates we do not support kill at node crossings.
|
// false indicates we do not support kill at node crossings.
|
||||||
branch_engine_->initParam(population_, energy, variance, dmcdriver_input_.get_reconfiguration(), false);
|
branch_engine_->initParam(population_, energy, variance, dmcdriver_input_.get_reconfiguration(), false);
|
||||||
walker_controller_->setTrialEnergy(branch_engine_->getEtrial());
|
walker_controller_->setTrialEnergy(branch_engine_->getEtrial());
|
||||||
|
|
||||||
|
print_mem("DMCBatched after initialLogEvaluation", app_summary());
|
||||||
|
if (qmcdriver_input_.get_measure_imbalance())
|
||||||
|
measureImbalance("InitialLogEvaluation");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this barrier fences all previous load imbalance. Avoid block 0 timing pollution.
|
||||||
|
myComm->barrier();
|
||||||
|
|
||||||
|
ScopedTimer local_timer(timers_.production_timer);
|
||||||
ParallelExecutor<> crowd_task;
|
ParallelExecutor<> crowd_task;
|
||||||
|
|
||||||
for (int block = 0; block < num_blocks; ++block)
|
for (int block = 0; block < num_blocks; ++block)
|
||||||
|
@ -475,6 +479,8 @@ bool DMCBatched::run()
|
||||||
population_.redistributeWalkers(crowds_);
|
population_.redistributeWalkers(crowds_);
|
||||||
}
|
}
|
||||||
print_mem("DMCBatched after a block", app_debug_stream());
|
print_mem("DMCBatched after a block", app_debug_stream());
|
||||||
|
if (qmcdriver_input_.get_measure_imbalance())
|
||||||
|
measureImbalance("Block " + std::to_string(block));
|
||||||
endBlock();
|
endBlock();
|
||||||
dmc_loop.stop();
|
dmc_loop.stop();
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,7 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
|
||||||
|
|
||||||
std::string serialize_walkers;
|
std::string serialize_walkers;
|
||||||
std::string debug_checks_str;
|
std::string debug_checks_str;
|
||||||
|
std::string measure_imbalance_str;
|
||||||
|
|
||||||
ParameterSet parameter_set;
|
ParameterSet parameter_set;
|
||||||
parameter_set.add(store_config_period_, "storeconfigs");
|
parameter_set.add(store_config_period_, "storeconfigs");
|
||||||
|
@ -70,6 +71,7 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
|
||||||
parameter_set.add(max_disp_sq_, "maxDisplSq");
|
parameter_set.add(max_disp_sq_, "maxDisplSq");
|
||||||
parameter_set.add(debug_checks_str, "debug_checks",
|
parameter_set.add(debug_checks_str, "debug_checks",
|
||||||
{"no", "all", "checkGL_after_load", "checkGL_after_moves", "checkGL_after_tmove"});
|
{"no", "all", "checkGL_after_load", "checkGL_after_moves", "checkGL_after_tmove"});
|
||||||
|
parameter_set.add(measure_imbalance_str, "measure_imbalance", {"no", "yes"});
|
||||||
|
|
||||||
OhmmsAttributeSet aAttrib;
|
OhmmsAttributeSet aAttrib;
|
||||||
// first stage in from QMCDriverFactory
|
// first stage in from QMCDriverFactory
|
||||||
|
@ -139,6 +141,9 @@ void QMCDriverInput::readXML(xmlNodePtr cur)
|
||||||
debug_checks_ |= DriverDebugChecks::CHECKGL_AFTER_TMOVE;
|
debug_checks_ |= DriverDebugChecks::CHECKGL_AFTER_TMOVE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (measure_imbalance_str == "yes")
|
||||||
|
measure_imbalance_ = true;
|
||||||
|
|
||||||
if (check_point_period_.period < 1)
|
if (check_point_period_.period < 1)
|
||||||
check_point_period_.period = max_blocks_;
|
check_point_period_.period = max_blocks_;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,8 +31,8 @@ public:
|
||||||
void readXML(xmlNodePtr cur);
|
void readXML(xmlNodePtr cur);
|
||||||
|
|
||||||
// To allow compile check if move constructor is still implicit
|
// To allow compile check if move constructor is still implicit
|
||||||
QMCDriverInput() = default;
|
QMCDriverInput() = default;
|
||||||
QMCDriverInput(const QMCDriverInput&) = default;
|
QMCDriverInput(const QMCDriverInput&) = default;
|
||||||
QMCDriverInput& operator=(const QMCDriverInput&) = default;
|
QMCDriverInput& operator=(const QMCDriverInput&) = default;
|
||||||
QMCDriverInput(QMCDriverInput&&) noexcept;
|
QMCDriverInput(QMCDriverInput&&) noexcept;
|
||||||
QMCDriverInput& operator=(QMCDriverInput&&) noexcept;
|
QMCDriverInput& operator=(QMCDriverInput&&) noexcept;
|
||||||
|
@ -41,6 +41,9 @@ protected:
|
||||||
bool scoped_profiling_ = false;
|
bool scoped_profiling_ = false;
|
||||||
/// determine additional checks for debugging purpose
|
/// determine additional checks for debugging purpose
|
||||||
DriverDebugChecks debug_checks_ = DriverDebugChecks::ALL_OFF;
|
DriverDebugChecks debug_checks_ = DriverDebugChecks::ALL_OFF;
|
||||||
|
/// measure load imbalance (add a barrier) before data aggregation (obvious synchronization)
|
||||||
|
bool measure_imbalance_ = false;
|
||||||
|
|
||||||
/** @ingroup Input Parameters for QMCDriver base class
|
/** @ingroup Input Parameters for QMCDriver base class
|
||||||
* @{
|
* @{
|
||||||
* All input determined variables should be here
|
* All input determined variables should be here
|
||||||
|
@ -128,13 +131,14 @@ public:
|
||||||
DriverDebugChecks get_debug_checks() const { return debug_checks_; }
|
DriverDebugChecks get_debug_checks() const { return debug_checks_; }
|
||||||
bool get_scoped_profiling() const { return scoped_profiling_; }
|
bool get_scoped_profiling() const { return scoped_profiling_; }
|
||||||
bool are_walkers_serialized() const { return crowd_serialize_walkers_; }
|
bool are_walkers_serialized() const { return crowd_serialize_walkers_; }
|
||||||
|
bool get_measure_imbalance() const { return measure_imbalance_; }
|
||||||
|
|
||||||
const std::string get_drift_modifier() const { return drift_modifier_; }
|
const std::string get_drift_modifier() const { return drift_modifier_; }
|
||||||
RealType get_drift_modifier_unr_a() const { return drift_modifier_unr_a_; }
|
RealType get_drift_modifier_unr_a() const { return drift_modifier_unr_a_; }
|
||||||
};
|
};
|
||||||
|
|
||||||
// These will cause a compiler error if the implicit move constructor has been broken
|
// These will cause a compiler error if the implicit move constructor has been broken
|
||||||
inline QMCDriverInput::QMCDriverInput(QMCDriverInput&&) noexcept = default;
|
inline QMCDriverInput::QMCDriverInput(QMCDriverInput&&) noexcept = default;
|
||||||
inline QMCDriverInput& QMCDriverInput::operator=(QMCDriverInput&&) noexcept = default;
|
inline QMCDriverInput& QMCDriverInput::operator=(QMCDriverInput&&) noexcept = default;
|
||||||
|
|
||||||
} // namespace qmcplusplus
|
} // namespace qmcplusplus
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include "Concurrency/Info.hpp"
|
#include "Concurrency/Info.hpp"
|
||||||
#include "QMCDrivers/GreenFunctionModifiers/DriftModifierBuilder.h"
|
#include "QMCDrivers/GreenFunctionModifiers/DriftModifierBuilder.h"
|
||||||
#include "Utilities/StlPrettyPrint.hpp"
|
#include "Utilities/StlPrettyPrint.hpp"
|
||||||
|
#include "Utilities/Timer.h"
|
||||||
#include "Message/UniformCommunicateError.h"
|
#include "Message/UniformCommunicateError.h"
|
||||||
|
|
||||||
namespace qmcplusplus
|
namespace qmcplusplus
|
||||||
|
@ -122,6 +123,8 @@ void QMCDriverNew::checkNumCrowdsLTNumThreads(const int num_crowds)
|
||||||
*/
|
*/
|
||||||
void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCounts& awc)
|
void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCounts& awc)
|
||||||
{
|
{
|
||||||
|
ScopedTimer local_timer(timers_.startup_timer);
|
||||||
|
|
||||||
app_summary() << QMCType << " Driver running with" << std::endl
|
app_summary() << QMCType << " Driver running with" << std::endl
|
||||||
<< " total_walkers = " << awc.global_walkers << std::endl
|
<< " total_walkers = " << awc.global_walkers << std::endl
|
||||||
<< " walkers_per_rank = " << awc.walkers_per_rank << std::endl
|
<< " walkers_per_rank = " << awc.walkers_per_rank << std::endl
|
||||||
|
@ -159,6 +162,9 @@ void QMCDriverNew::startup(xmlNodePtr cur, const QMCDriverNew::AdjustedWalkerCou
|
||||||
|
|
||||||
// Once they are created move contexts can be created.
|
// Once they are created move contexts can be created.
|
||||||
createRngsStepContexts(crowds_.size());
|
createRngsStepContexts(crowds_.size());
|
||||||
|
|
||||||
|
if (qmcdriver_input_.get_measure_imbalance())
|
||||||
|
measureImbalance("Startup");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** QMCDriverNew ignores h5name if you want to read and h5 config you have to explicitly
|
/** QMCDriverNew ignores h5name if you want to read and h5 config you have to explicitly
|
||||||
|
@ -218,9 +224,8 @@ void QMCDriverNew::recordBlock(int block)
|
||||||
{
|
{
|
||||||
if (qmcdriver_input_.get_dump_config() && block % qmcdriver_input_.get_check_point_period().period == 0)
|
if (qmcdriver_input_.get_dump_config() && block % qmcdriver_input_.get_check_point_period().period == 0)
|
||||||
{
|
{
|
||||||
timers_.checkpoint_timer.start();
|
ScopedTimer local_timer(timers_.checkpoint_timer);
|
||||||
RandomNumberControl::write(root_name_, myComm);
|
RandomNumberControl::write(root_name_, myComm);
|
||||||
timers_.checkpoint_timer.stop();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,8 +239,7 @@ bool QMCDriverNew::finalize(int block, bool dumpwalkers)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void QMCDriverNew::makeLocalWalkers(IndexType nwalkers,
|
void QMCDriverNew::makeLocalWalkers(IndexType nwalkers, RealType reserve)
|
||||||
RealType reserve)
|
|
||||||
{
|
{
|
||||||
ScopedTimer local_timer(timers_.create_walkers_timer);
|
ScopedTimer local_timer(timers_.create_walkers_timer);
|
||||||
// ensure nwalkers local walkers in population_
|
// ensure nwalkers local walkers in population_
|
||||||
|
@ -462,6 +466,7 @@ QMCDriverNew::AdjustedWalkerCounts QMCDriverNew::adjustGlobalWalkerCount(int num
|
||||||
*/
|
*/
|
||||||
void QMCDriverNew::endBlock()
|
void QMCDriverNew::endBlock()
|
||||||
{
|
{
|
||||||
|
ScopedTimer local_timer(timers_.endblock_timer);
|
||||||
RefVector<ScalarEstimatorBase> all_scalar_estimators;
|
RefVector<ScalarEstimatorBase> all_scalar_estimators;
|
||||||
|
|
||||||
FullPrecRealType total_block_weight = 0.0;
|
FullPrecRealType total_block_weight = 0.0;
|
||||||
|
@ -570,4 +575,28 @@ void QMCDriverNew::checkLogAndGL(Crowd& crowd, const std::string_view location)
|
||||||
throw std::runtime_error(std::string("checkLogAndGL failed at ") + std::string(location) + std::string("\n"));
|
throw std::runtime_error(std::string("checkLogAndGL failed at ") + std::string(location) + std::string("\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void QMCDriverNew::measureImbalance(const std::string& tag) const
|
||||||
|
{
|
||||||
|
ScopedTimer local_timer(timers_.imbalance_timer);
|
||||||
|
Timer only_this_barrier;
|
||||||
|
myComm->barrier();
|
||||||
|
std::vector<double> my_barrier_time(1, only_this_barrier.elapsed());
|
||||||
|
std::vector<double> barrier_time_all_ranks(myComm->size(), 0.0);
|
||||||
|
myComm->gather(my_barrier_time, barrier_time_all_ranks, 0);
|
||||||
|
if (!myComm->rank())
|
||||||
|
{
|
||||||
|
auto const count = static_cast<double>(barrier_time_all_ranks.size());
|
||||||
|
const auto max_it = std::max_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
|
||||||
|
const auto min_it = std::min_element(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end());
|
||||||
|
app_log() << std::endl
|
||||||
|
<< tag << " imbalance (slow ranks wait less):" << std::endl
|
||||||
|
<< " average wait seconds = "
|
||||||
|
<< std::accumulate(barrier_time_all_ranks.begin(), barrier_time_all_ranks.end(), 0.0) / count << std::endl
|
||||||
|
<< " min wait at rank " << std::distance(barrier_time_all_ranks.begin(), min_it)
|
||||||
|
<< ", seconds = " << *min_it << std::endl
|
||||||
|
<< " max wait at rank " << std::distance(barrier_time_all_ranks.begin(), max_it)
|
||||||
|
<< ", seconds = " << *max_it << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace qmcplusplus
|
} // namespace qmcplusplus
|
||||||
|
|
|
@ -102,6 +102,9 @@ public:
|
||||||
std::bitset<QMC_MODE_MAX> qmc_driver_mode_;
|
std::bitset<QMC_MODE_MAX> qmc_driver_mode_;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
/// inject additional barrier and measure load imbalance.
|
||||||
|
void measureImbalance(const std::string& tag) const;
|
||||||
|
/// end of a block operations. Aggregates statistics across all MPI ranks and write to disk.
|
||||||
void endBlock();
|
void endBlock();
|
||||||
/** This is a data structure strictly for QMCDriver and its derived classes
|
/** This is a data structure strictly for QMCDriver and its derived classes
|
||||||
*
|
*
|
||||||
|
@ -325,6 +328,10 @@ protected:
|
||||||
NewTimer& hamiltonian_timer;
|
NewTimer& hamiltonian_timer;
|
||||||
NewTimer& collectables_timer;
|
NewTimer& collectables_timer;
|
||||||
NewTimer& estimators_timer;
|
NewTimer& estimators_timer;
|
||||||
|
NewTimer& imbalance_timer;
|
||||||
|
NewTimer& endblock_timer;
|
||||||
|
NewTimer& startup_timer;
|
||||||
|
NewTimer& production_timer;
|
||||||
NewTimer& resource_timer;
|
NewTimer& resource_timer;
|
||||||
DriverTimers(const std::string& prefix)
|
DriverTimers(const std::string& prefix)
|
||||||
: checkpoint_timer(*timer_manager.createTimer(prefix + "CheckPoint", timer_level_medium)),
|
: checkpoint_timer(*timer_manager.createTimer(prefix + "CheckPoint", timer_level_medium)),
|
||||||
|
@ -336,6 +343,10 @@ protected:
|
||||||
hamiltonian_timer(*timer_manager.createTimer(prefix + "Hamiltonian", timer_level_medium)),
|
hamiltonian_timer(*timer_manager.createTimer(prefix + "Hamiltonian", timer_level_medium)),
|
||||||
collectables_timer(*timer_manager.createTimer(prefix + "Collectables", timer_level_medium)),
|
collectables_timer(*timer_manager.createTimer(prefix + "Collectables", timer_level_medium)),
|
||||||
estimators_timer(*timer_manager.createTimer(prefix + "Estimators", timer_level_medium)),
|
estimators_timer(*timer_manager.createTimer(prefix + "Estimators", timer_level_medium)),
|
||||||
|
imbalance_timer(*timer_manager.createTimer(prefix + "Imbalance", timer_level_medium)),
|
||||||
|
endblock_timer(*timer_manager.createTimer(prefix + "BlockEndDataAggregation", timer_level_medium)),
|
||||||
|
startup_timer(*timer_manager.createTimer(prefix + "Startup", timer_level_medium)),
|
||||||
|
production_timer(*timer_manager.createTimer(prefix + "Production", timer_level_medium)),
|
||||||
resource_timer(*timer_manager.createTimer(prefix + "Resources", timer_level_medium))
|
resource_timer(*timer_manager.createTimer(prefix + "Resources", timer_level_medium))
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
|
@ -66,7 +66,7 @@ void VMCBatched::advanceWalkers(const StateForThread& sft,
|
||||||
|
|
||||||
timers.movepbyp_timer.start();
|
timers.movepbyp_timer.start();
|
||||||
const int num_walkers = crowd.size();
|
const int num_walkers = crowd.size();
|
||||||
auto& walker_leader = walker_elecs.getLeader();
|
auto& walker_leader = walker_elecs.getLeader();
|
||||||
const int num_particles = walker_leader.getTotalNum();
|
const int num_particles = walker_leader.getTotalNum();
|
||||||
// Note std::vector<bool> is not like the rest of stl.
|
// Note std::vector<bool> is not like the rest of stl.
|
||||||
std::vector<bool> moved(num_walkers, false);
|
std::vector<bool> moved(num_walkers, false);
|
||||||
|
@ -299,10 +299,12 @@ bool VMCBatched::run()
|
||||||
ScopedTimer local_timer(timers_.init_walkers_timer);
|
ScopedTimer local_timer(timers_.init_walkers_timer);
|
||||||
ParallelExecutor<> section_start_task;
|
ParallelExecutor<> section_start_task;
|
||||||
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
|
section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
|
||||||
|
print_mem("VMCBatched after initialLogEvaluation", app_summary());
|
||||||
|
if (qmcdriver_input_.get_measure_imbalance())
|
||||||
|
measureImbalance("InitialLogEvaluation");
|
||||||
}
|
}
|
||||||
|
|
||||||
print_mem("VMCBatched after initialLogEvaluation", app_summary());
|
ScopedTimer local_timer(timers_.production_timer);
|
||||||
|
|
||||||
ParallelExecutor<> crowd_task;
|
ParallelExecutor<> crowd_task;
|
||||||
|
|
||||||
if (qmcdriver_input_.get_warmup_steps() > 0)
|
if (qmcdriver_input_.get_warmup_steps() > 0)
|
||||||
|
@ -331,8 +333,13 @@ bool VMCBatched::run()
|
||||||
|
|
||||||
app_log() << "Warm-up is completed!" << std::endl;
|
app_log() << "Warm-up is completed!" << std::endl;
|
||||||
print_mem("VMCBatched after Warmup", app_log());
|
print_mem("VMCBatched after Warmup", app_log());
|
||||||
|
if (qmcdriver_input_.get_measure_imbalance())
|
||||||
|
measureImbalance("Warmup");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this barrier fences all previous load imbalance. Avoid block 0 timing pollution.
|
||||||
|
myComm->barrier();
|
||||||
|
|
||||||
for (int block = 0; block < num_blocks; ++block)
|
for (int block = 0; block < num_blocks; ++block)
|
||||||
{
|
{
|
||||||
vmc_loop.start();
|
vmc_loop.start();
|
||||||
|
@ -362,6 +369,8 @@ bool VMCBatched::run()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
print_mem("VMCBatched after a block", app_debug_stream());
|
print_mem("VMCBatched after a block", app_debug_stream());
|
||||||
|
if (qmcdriver_input_.get_measure_imbalance())
|
||||||
|
measureImbalance("Block " + std::to_string(block));
|
||||||
endBlock();
|
endBlock();
|
||||||
vmc_loop.stop();
|
vmc_loop.stop();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue