mirror of https://github.com/QMCPACK/qmcpack.git
Traces throttling. Preliminary, please test before trusting.
git-svn-id: https://subversion.assembla.com/svn/qmcdev/trunk@6367 e5b18d87-469d-4833-9cc0-8cdfa06e9491
This commit is contained in:
parent
e63019fdee
commit
74ff608843
|
@ -1219,6 +1219,12 @@ struct TraceBuffer
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline bool empty()
|
||||||
|
{
|
||||||
|
return buffer.size(0)==0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void set_samples(TraceSamples<T>& s)
|
inline void set_samples(TraceSamples<T>& s)
|
||||||
{
|
{
|
||||||
samples = &s;
|
samples = &s;
|
||||||
|
@ -1569,6 +1575,7 @@ public:
|
||||||
bool method_allows_traces;
|
bool method_allows_traces;
|
||||||
bool streaming_traces;
|
bool streaming_traces;
|
||||||
bool writing_traces;
|
bool writing_traces;
|
||||||
|
int buffer_steps; //number of steps to buffer data between writes
|
||||||
bool verbose;
|
bool verbose;
|
||||||
string format;
|
string format;
|
||||||
bool hdf_format;
|
bool hdf_format;
|
||||||
|
@ -1587,6 +1594,7 @@ public:
|
||||||
reset_permissions();
|
reset_permissions();
|
||||||
master_copy = true;
|
master_copy = true;
|
||||||
communicator = comm;
|
communicator = comm;
|
||||||
|
buffer_steps = 10;
|
||||||
format = "hdf";
|
format = "hdf";
|
||||||
default_domain = "scalars";
|
default_domain = "scalars";
|
||||||
request.set_scalar_domain(default_domain);
|
request.set_scalar_domain(default_domain);
|
||||||
|
@ -1622,6 +1630,7 @@ public:
|
||||||
request = tm.request;
|
request = tm.request;
|
||||||
streaming_traces = tm.streaming_traces;
|
streaming_traces = tm.streaming_traces;
|
||||||
writing_traces = tm.writing_traces;
|
writing_traces = tm.writing_traces;
|
||||||
|
buffer_steps = tm.buffer_steps;
|
||||||
verbose = tm.verbose;
|
verbose = tm.verbose;
|
||||||
format = tm.format;
|
format = tm.format;
|
||||||
hdf_format = tm.hdf_format;
|
hdf_format = tm.hdf_format;
|
||||||
|
@ -1687,6 +1696,7 @@ public:
|
||||||
attrib.add(verbose_write, "verbose" );
|
attrib.add(verbose_write, "verbose" );
|
||||||
attrib.add(array, "particle" );//legacy
|
attrib.add(array, "particle" );//legacy
|
||||||
attrib.add(array_defaults, "particle_defaults" );//legacy
|
attrib.add(array_defaults, "particle_defaults" );//legacy
|
||||||
|
attrib.add(buffer_steps, "buffer" );//legacy
|
||||||
attrib.put(cur);
|
attrib.put(cur);
|
||||||
writing_traces = writing == "yes";
|
writing_traces = writing == "yes";
|
||||||
bool scalars_on = scalar == "yes";
|
bool scalars_on = scalar == "yes";
|
||||||
|
@ -1694,6 +1704,8 @@ public:
|
||||||
bool use_scalar_defaults = scalar_defaults == "yes";
|
bool use_scalar_defaults = scalar_defaults == "yes";
|
||||||
bool use_array_defaults = array_defaults == "yes";
|
bool use_array_defaults = array_defaults == "yes";
|
||||||
verbose = verbose_write == "yes";
|
verbose = verbose_write == "yes";
|
||||||
|
if(buffer_steps<1)
|
||||||
|
APP_ABORT("TraceManager::put buffer steps cannot be less than 1");
|
||||||
tolower(format);
|
tolower(format);
|
||||||
if(format=="hdf")
|
if(format=="hdf")
|
||||||
{
|
{
|
||||||
|
@ -2051,34 +2063,43 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//write buffered trace data to file
|
//monitor what step the run is on, and write when it is time
|
||||||
inline void write_buffers(vector<TraceManager*>& clones, int block)
|
//(throttled write, threaded, critical)
|
||||||
|
inline void monitor_writes(int step,TraceManager* master)
|
||||||
{
|
{
|
||||||
if(master_copy)
|
if(step%buffer_steps)
|
||||||
{
|
{
|
||||||
if(writing_traces)
|
#pragma omp critical(TRACE_BUFFER_WRITE)
|
||||||
{
|
{
|
||||||
double tstart = MPI_Wtime();
|
write_buffers(*master);
|
||||||
if(verbose)
|
|
||||||
app_log()<<"TraceManager::write_buffers "<<master_copy<<endl;
|
|
||||||
if(hdf_format)
|
|
||||||
{
|
|
||||||
write_buffers_hdf(clones);
|
|
||||||
}
|
|
||||||
if(adios_format)
|
|
||||||
{
|
|
||||||
#ifdef HAVE_ADIOS
|
|
||||||
write_buffers_adios(clones, block);
|
|
||||||
#else
|
|
||||||
APP_ABORT("TraceManager::write_buffers (adios) ADIOS is not found");
|
|
||||||
#endif
|
|
||||||
//app_log()<<"TraceManager::write_buffers (adios) has not yet been implemented"<<endl;
|
|
||||||
app_log()<<" write_buffers() total time "<<MPI_Wtime()-tstart<<endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
APP_ABORT("TraceManager::write_buffers should not be called from non-master copy");
|
|
||||||
|
|
||||||
|
//write buffered trace data to file
|
||||||
|
inline void write_buffers(TraceManager& master)
|
||||||
|
{
|
||||||
|
if(writing_traces && !buffers_empty())
|
||||||
|
{
|
||||||
|
if(hdf_format)
|
||||||
|
write_buffers_hdf(master);
|
||||||
|
if(adios_format)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_ADIOS
|
||||||
|
write_buffers_adios(master);
|
||||||
|
#else
|
||||||
|
APP_ABORT("TraceManager::write_buffer (adios) ADIOS is not found");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
reset_buffers();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline bool buffers_empty()
|
||||||
|
{
|
||||||
|
return int_buffer.empty() && real_buffer.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -2139,7 +2160,7 @@ public:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline void startRun(int blocks,vector<TraceManager*>& clones)
|
inline void startRun(vector<TraceManager*>& clones)
|
||||||
{
|
{
|
||||||
if(verbose)
|
if(verbose)
|
||||||
app_log()<<"TraceManager::startRun "<<master_copy<<endl;
|
app_log()<<"TraceManager::startRun "<<master_copy<<endl;
|
||||||
|
@ -2148,18 +2169,23 @@ public:
|
||||||
initialize_traces();
|
initialize_traces();
|
||||||
check_clones(clones);
|
check_clones(clones);
|
||||||
open_file(clones);
|
open_file(clones);
|
||||||
|
for(int ip=0; ip<clones.size(); ++ip)
|
||||||
|
clones[ip]->reset_buffers();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
APP_ABORT("TraceManager::startRun should not be called from non-master copy");
|
APP_ABORT("TraceManager::startRun should not be called from non-master copy");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void stopRun()
|
inline void stopRun(vector<TraceManager*>& clones)
|
||||||
{
|
{
|
||||||
if(verbose)
|
if(verbose)
|
||||||
app_log()<<"TraceManager::stopRun "<<master_copy<<endl;
|
app_log()<<"TraceManager::stopRun "<<master_copy<<endl;
|
||||||
if(master_copy)
|
if(master_copy)
|
||||||
{
|
{
|
||||||
|
if(writing_traces)// flush any additional data from the buffers
|
||||||
|
for(int ip=0; ip<clones.size(); ++ip)
|
||||||
|
clones[ip]->write_buffers(*this);
|
||||||
close_file();
|
close_file();
|
||||||
finalize_traces();
|
finalize_traces();
|
||||||
}
|
}
|
||||||
|
@ -2168,21 +2194,6 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void startBlock(int nsteps)
|
|
||||||
{
|
|
||||||
if(verbose)
|
|
||||||
app_log()<<"TraceManager::startBlock "<<master_copy<<endl;
|
|
||||||
reset_buffers();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
inline void stopBlock()
|
|
||||||
{
|
|
||||||
if(verbose)
|
|
||||||
app_log()<<"TraceManager::stopBlock "<<master_copy<<endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
inline void write_summary(string pad=" ")
|
inline void write_summary(string pad=" ")
|
||||||
{
|
{
|
||||||
string pad2 = pad+" ";
|
string pad2 = pad+" ";
|
||||||
|
@ -2252,16 +2263,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void write_buffers_hdf(vector<TraceManager*>& clones)
|
inline void write_buffers_hdf(TraceManager& master)
|
||||||
{
|
{
|
||||||
if(verbose)
|
int_buffer.write_hdf(*master.hdf_file,master.int_buffer.hdf_file_pointer);
|
||||||
app_log()<<"TraceManager::write_buffers_hdf "<<master_copy<<endl;
|
real_buffer.write_hdf(*master.hdf_file,master.real_buffer.hdf_file_pointer);
|
||||||
for(int ip=0; ip<clones.size(); ++ip)
|
|
||||||
{
|
|
||||||
TraceManager& tm = *clones[ip];
|
|
||||||
tm.int_buffer.write_hdf(*hdf_file,int_buffer.hdf_file_pointer);
|
|
||||||
tm.real_buffer.write_hdf(*hdf_file,real_buffer.hdf_file_pointer);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -196,7 +196,7 @@ bool CSVMC::run()
|
||||||
Estimators->start(nBlocks);
|
Estimators->start(nBlocks);
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
CSMovers[ip]->startRun(nBlocks,false);
|
CSMovers[ip]->startRun(nBlocks,false);
|
||||||
Traces->startRun(nBlocks,traceClones);
|
Traces->startRun(traceClones);
|
||||||
const bool has_collectables=W.Collectables.size();
|
const bool has_collectables=W.Collectables.size();
|
||||||
ADIOS_PROFILE::profile_adios_init(nBlocks);
|
ADIOS_PROFILE::profile_adios_init(nBlocks);
|
||||||
for (int block=0; block<nBlocks; ++block)
|
for (int block=0; block<nBlocks; ++block)
|
||||||
|
@ -227,6 +227,7 @@ bool CSVMC::run()
|
||||||
wClones[ip]->saveEnsemble(wit,wit_end);
|
wClones[ip]->saveEnsemble(wit,wit_end);
|
||||||
// if(storeConfigs && (now_loc%storeConfigs == 0))
|
// if(storeConfigs && (now_loc%storeConfigs == 0))
|
||||||
// ForwardWalkingHistory.storeConfigsForForwardWalking(*wClones[ip]);
|
// ForwardWalkingHistory.storeConfigsForForwardWalking(*wClones[ip]);
|
||||||
|
traceClones[ip]->monitor_writes(now_loc,Traces);
|
||||||
}
|
}
|
||||||
CSMovers[ip]->stopBlock(false);
|
CSMovers[ip]->stopBlock(false);
|
||||||
// app_log()<<"THREAD "<<ip<<endl;
|
// app_log()<<"THREAD "<<ip<<endl;
|
||||||
|
@ -236,11 +237,6 @@ bool CSVMC::run()
|
||||||
//Estimators->accumulateCollectables(wClones,nSteps);
|
//Estimators->accumulateCollectables(wClones,nSteps);
|
||||||
CurrentStep+=nSteps;
|
CurrentStep+=nSteps;
|
||||||
Estimators->stopBlock(estimatorClones);
|
Estimators->stopBlock(estimatorClones);
|
||||||
ADIOS_PROFILE::profile_adios_end_comp(block);
|
|
||||||
ADIOS_PROFILE::profile_adios_start_trace(block);
|
|
||||||
Traces->write_buffers(traceClones, block);
|
|
||||||
ADIOS_PROFILE::profile_adios_end_trace(block);
|
|
||||||
ADIOS_PROFILE::profile_adios_start_checkpoint(block);
|
|
||||||
if(storeConfigs)
|
if(storeConfigs)
|
||||||
recordBlock(block);
|
recordBlock(block);
|
||||||
ADIOS_PROFILE::profile_adios_end_checkpoint(block);
|
ADIOS_PROFILE::profile_adios_end_checkpoint(block);
|
||||||
|
@ -249,7 +245,7 @@ bool CSVMC::run()
|
||||||
Estimators->stop(estimatorClones);
|
Estimators->stop(estimatorClones);
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
CSMovers[ip]->stopRun2();
|
CSMovers[ip]->stopRun2();
|
||||||
Traces->stopRun();
|
Traces->stopRun(traceClones);
|
||||||
//copy back the random states
|
//copy back the random states
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
*(RandomNumberControl::Children[ip])=*(Rng[ip]);
|
*(RandomNumberControl::Children[ip])=*(Rng[ip]);
|
||||||
|
|
|
@ -243,7 +243,7 @@ bool DMCOMP::run()
|
||||||
Estimators->start(nBlocks);
|
Estimators->start(nBlocks);
|
||||||
for(int ip=0; ip<NumThreads; ip++)
|
for(int ip=0; ip<NumThreads; ip++)
|
||||||
Movers[ip]->startRun(nBlocks,false);
|
Movers[ip]->startRun(nBlocks,false);
|
||||||
Traces->startRun(nBlocks,traceClones);
|
Traces->startRun(traceClones);
|
||||||
Timer myclock;
|
Timer myclock;
|
||||||
IndexType block = 0;
|
IndexType block = 0;
|
||||||
IndexType updatePeriod=(QMCDriverMode[QMC_UPDATE_MODE])?Period4CheckProperties:(nBlocks+1)*nSteps;
|
IndexType updatePeriod=(QMCDriverMode[QMC_UPDATE_MODE])?Period4CheckProperties:(nBlocks+1)*nSteps;
|
||||||
|
@ -280,6 +280,7 @@ bool DMCOMP::run()
|
||||||
Movers[ip]->setMultiplicity(wit,wit_end);
|
Movers[ip]->setMultiplicity(wit,wit_end);
|
||||||
if(QMCDriverMode[QMC_UPDATE_MODE] && now%updatePeriod == 0)
|
if(QMCDriverMode[QMC_UPDATE_MODE] && now%updatePeriod == 0)
|
||||||
Movers[ip]->updateWalkers(wit, wit_end);
|
Movers[ip]->updateWalkers(wit, wit_end);
|
||||||
|
traceClones[ip]->monitor_writes(now,Traces);
|
||||||
}
|
}
|
||||||
|
|
||||||
prof.pop(); //close dmc_advance
|
prof.pop(); //close dmc_advance
|
||||||
|
@ -304,7 +305,6 @@ bool DMCOMP::run()
|
||||||
}
|
}
|
||||||
// branchEngine->debugFWconfig();
|
// branchEngine->debugFWconfig();
|
||||||
Estimators->stopBlock(acceptRatio());
|
Estimators->stopBlock(acceptRatio());
|
||||||
Traces->write_buffers(traceClones, block);
|
|
||||||
block++;
|
block++;
|
||||||
if(DumpConfig &&block%Period4CheckPoint == 0)
|
if(DumpConfig &&block%Period4CheckPoint == 0)
|
||||||
{
|
{
|
||||||
|
@ -322,7 +322,7 @@ bool DMCOMP::run()
|
||||||
Estimators->stop();
|
Estimators->stop();
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
Movers[ip]->stopRun2();
|
Movers[ip]->stopRun2();
|
||||||
Traces->stopRun();
|
Traces->stopRun(traceClones);
|
||||||
return finalize(nBlocks);
|
return finalize(nBlocks);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -154,7 +154,6 @@ void QMCUpdateBase::stopRun2()
|
||||||
void QMCUpdateBase::startBlock(int steps)
|
void QMCUpdateBase::startBlock(int steps)
|
||||||
{
|
{
|
||||||
Estimators->startBlock(steps);
|
Estimators->startBlock(steps);
|
||||||
Traces->startBlock(steps);
|
|
||||||
nAccept = 0;
|
nAccept = 0;
|
||||||
nReject=0;
|
nReject=0;
|
||||||
nAllRejected=0;
|
nAllRejected=0;
|
||||||
|
@ -165,7 +164,6 @@ void QMCUpdateBase::startBlock(int steps)
|
||||||
void QMCUpdateBase::stopBlock(bool collectall)
|
void QMCUpdateBase::stopBlock(bool collectall)
|
||||||
{
|
{
|
||||||
Estimators->stopBlock(acceptRatio(),collectall);
|
Estimators->stopBlock(acceptRatio(),collectall);
|
||||||
Traces->stopBlock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void QMCUpdateBase::initWalkers(WalkerIter_t it, WalkerIter_t it_end)
|
void QMCUpdateBase::initWalkers(WalkerIter_t it, WalkerIter_t it_end)
|
||||||
|
|
|
@ -52,7 +52,7 @@ bool RMCSingleOMP::run()
|
||||||
|
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
Movers[ip]->startRun(nBlocks,false);
|
Movers[ip]->startRun(nBlocks,false);
|
||||||
Traces->startRun(nBlocks,traceClones);
|
Traces->startRun(traceClones);
|
||||||
const bool has_collectables=W.Collectables.size();
|
const bool has_collectables=W.Collectables.size();
|
||||||
for (int block=0; block<nBlocks; ++block)
|
for (int block=0; block<nBlocks; ++block)
|
||||||
{
|
{
|
||||||
|
|
|
@ -53,7 +53,7 @@ bool VMCSingleOMP::run()
|
||||||
Estimators->start(nBlocks);
|
Estimators->start(nBlocks);
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
Movers[ip]->startRun(nBlocks,false);
|
Movers[ip]->startRun(nBlocks,false);
|
||||||
Traces->startRun(nBlocks,traceClones);
|
Traces->startRun(traceClones);
|
||||||
const bool has_collectables=W.Collectables.size();
|
const bool has_collectables=W.Collectables.size();
|
||||||
for (int block=0; block<nBlocks; ++block)
|
for (int block=0; block<nBlocks; ++block)
|
||||||
{
|
{
|
||||||
|
@ -82,20 +82,20 @@ bool VMCSingleOMP::run()
|
||||||
wClones[ip]->saveEnsemble(wit,wit_end);
|
wClones[ip]->saveEnsemble(wit,wit_end);
|
||||||
// if(storeConfigs && (now_loc%storeConfigs == 0))
|
// if(storeConfigs && (now_loc%storeConfigs == 0))
|
||||||
// ForwardWalkingHistory.storeConfigsForForwardWalking(*wClones[ip]);
|
// ForwardWalkingHistory.storeConfigsForForwardWalking(*wClones[ip]);
|
||||||
|
traceClones[ip]->monitor_writes(now_loc,Traces);
|
||||||
}
|
}
|
||||||
Movers[ip]->stopBlock(false);
|
Movers[ip]->stopBlock(false);
|
||||||
}//end-of-parallel for
|
}//end-of-parallel for
|
||||||
//Estimators->accumulateCollectables(wClones,nSteps);
|
//Estimators->accumulateCollectables(wClones,nSteps);
|
||||||
CurrentStep+=nSteps;
|
CurrentStep+=nSteps;
|
||||||
Estimators->stopBlock(estimatorClones);
|
Estimators->stopBlock(estimatorClones);
|
||||||
Traces->write_buffers(traceClones, block);
|
|
||||||
if(storeConfigs)
|
if(storeConfigs)
|
||||||
recordBlock(block);
|
recordBlock(block);
|
||||||
}//block
|
}//block
|
||||||
Estimators->stop(estimatorClones);
|
Estimators->stop(estimatorClones);
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
Movers[ip]->stopRun2();
|
Movers[ip]->stopRun2();
|
||||||
Traces->stopRun();
|
Traces->stopRun(traceClones);
|
||||||
//copy back the random states
|
//copy back the random states
|
||||||
for (int ip=0; ip<NumThreads; ++ip)
|
for (int ip=0; ip<NumThreads; ++ip)
|
||||||
*(RandomNumberControl::Children[ip])=*(Rng[ip]);
|
*(RandomNumberControl::Children[ip])=*(Rng[ip]);
|
||||||
|
|
Loading…
Reference in New Issue