qmcpack/nexus/lib/qmcpack_analyzer.py

##################################################################
##  (c) Copyright 2015-  by Jaron T. Krogel                     ##
##################################################################


#====================================================================#
#  qmcpack_analyzer.py                                               #
#    Supports data analysis for QMCPACK output.                      #
#                                                                    #
#  Content summary:                                                  #
#    QmcpackAnalyzer                                                 #
#      SimulationAnalyzer class for QMCPACK.                         #
#                                                                    #
#    QmcpackCapabilities                                             #
#      Class to pair QMCPACK output data with analyzer classes.      #
#                                                                    #
#    QmcpackAnalysisRequest                                          #
#      Offers detailed control over analysis.                        #
#      Serves as a record of requested analysis.                     #
#                                                                    #
#====================================================================#


from time import time

#python standard library imports
import os
import re
import sys
import traceback
from numpy import arange,array
#custom library imports
from generic import obj
from developer import unavailable
from xmlreader import XMLreader
from plotting import *
from physical_system import ghost_atoms
#QmcpackAnalyzer classes imports
from qmcpack_analyzer_base import QAobject,QAanalyzer,QAanalyzerCollection
from qmcpack_property_analyzers \
    import WavefunctionAnalyzer
from qmcpack_quantity_analyzers \
    import ScalarsDatAnalyzer,ScalarsHDFAnalyzer,DmcDatAnalyzer,\
    EnergyDensityAnalyzer,TracesAnalyzer,DensityMatricesAnalyzer,\
    SpinDensityAnalyzer,StructureFactorAnalyzer,DensityAnalyzer
from qmcpack_method_analyzers \
    import OptAnalyzer,VmcAnalyzer,DmcAnalyzer
from qmcpack_result_analyzers \
    import OptimizationAnalyzer,TimestepStudyAnalyzer
from simulation import SimulationAnalyzer,Simulation
from qmcpack_input import QmcpackInput
from debug import *

try:
    import h5py
    h5py_unavailable = False
except:
    h5py = unavailable('h5py')
    h5py_unavailable = True
#end try


class QmcpackAnalyzerCapabilities(QAobject):

    def __init__(self):

        self.methods=set(['opt','vmc','dmc','rmc'])
        self.data_sources = set(['scalar','stat','dmc','storeconfig','opt','traces'])
        self.scalars=set(['localenergy','localpotential','kinetic','elecelec','localecp','nonlocalecp','ionion','localenergy_sq','acceptratio','blockcpu','blockweight','mpc','kecorr'])
        self.fields=set(['energydensity','density','dm1b','spindensity','structurefactor'])

        hdf_data_sources = set(['stat','storeconfig','traces'])
        if h5py_unavailable:
            self.data_sources -= hdf_data_sources
        #end if

        self.analyzer_quantities = set(self.fields)

        self.analyzers = obj(
            scalars_dat     = ScalarsDatAnalyzer,
            scalars_hdf     = ScalarsHDFAnalyzer,
            dmc_dat         = DmcDatAnalyzer,
            traces          = TracesAnalyzer,
            energydensity   = EnergyDensityAnalyzer,
            dm1b            = DensityMatricesAnalyzer,
            spindensity     = SpinDensityAnalyzer,
            structurefactor = StructureFactorAnalyzer,
            density         = DensityAnalyzer
            )

        self.quantities = self.scalars | self.fields

        self.ignorable_estimators=set(['LocalEnergy'])

        self.quantity_aliases=dict()
        for q in self.analyzer_quantities:
            self.quantity_aliases[q]=q
        #end for

        self.future_quantities=set(['StructureFactor','MomentumDistribution'])
        return
    #end def __init__
#end class QmcpackCapabilities


QAanalyzer.capabilities = QmcpackAnalyzerCapabilities()


class QmcpackAnalysisRequest(QAobject):
    def __init__(self,source=None,destination=None,savefile='',
                 methods=None,calculations=None,data_sources=None,quantities=None,
                 warmup_calculations=None,
                 output=set(['averages','samples']),
                 ndmc_blocks=1000,equilibration=None,group_num=None,
                 traces=False,dm_settings=None):
        self.source          = source
        self.destination     = destination
        self.savefile        = str(savefile)
        self.output          = set(output)
        self.ndmc_blocks     = int(ndmc_blocks)
        self.group_num       = group_num
        self.traces          = traces
        self.dm_settings     = dm_settings

        cap = QAanalyzer.capabilities

        if methods is None:
            self.methods = set(cap.methods)
        else:
            self.methods = set(methods) & cap.methods
        #end if
        if calculations is None:
            self.calculations = set()
        else:
            self.calculations = set(calculations)
        #end if
        if data_sources is None:
            self.data_sources = set(cap.data_sources)
        else:
            self.data_sources = set(data_sources) & cap.data_sources
        #end if
        if quantities is None:
            self.quantities   = set(cap.quantities)
        else:
            quants = set()
            for q in quantities:
                qc = self.condense_name(q)
                quants.add(qc)
            #end for
            self.quantities  = quants & cap.quantities
        #end if
        if warmup_calculations is None:
            self.warmup_calculations = set()
        else:
            self.warmup_calculations = set(warmup_calculations)
        #end if
        if isinstance(equilibration,(dict,obj)):
            eq = obj()
            eq.transfer_from(equilibration)
        else:
            eq = equilibration
        #end if
        self.equilibration = eq

        return
    #end def __init__

    def complete(self):
        spath,sfile = os.path.split(self.source)
        if spath=='':
            self.source = os.path.join('./',self.source)
        #end if
        if self.destination==None:
            self.destination = os.path.split(self.source)[0]
        #end if
        return True
    #end def complete
#end class QmcpackAnalysisRequest:


"""
class QmcpackAnalyzer
  used to analyze all data produced by QMCPACK

  usage:
     results = QmcpackAnalyzer("qmcpack.in.xml")
       |  QMC methods used and observables estimated are determined
       \  Each observable is calculated by an object contained in results
"""
class QmcpackAnalyzer(SimulationAnalyzer,QAanalyzer):
    def __init__(self,arg0=None,**kwargs):

        verbose = False
        if 'verbose' in kwargs:
            verbose=kwargs['verbose']
            del kwargs['verbose']
        #end if
        QAanalyzer.verbose_vlog = verbose or QAanalyzer.verbose_vlog

        nindent = 0
        if 'nindent' in kwargs:
            nindent=kwargs['nindent']
            del kwargs['nindent']
        #end if
        QAanalyzer.__init__(self,nindent=nindent)

        analyze = False
        if 'analyze' in kwargs:
            analyze=kwargs['analyze']
            del kwargs['analyze']
        #end if

        if 'ghost_atoms' in kwargs:
            ghosts = kwargs.pop('ghost_atoms')
            ghost_atoms(*ghosts)
        #end if

        if isinstance(arg0,Simulation):
            sim = arg0
            if 'analysis_request' in sim:
                request = sim.analysis_request.copy()
            else:
                request = QmcpackAnalysisRequest(
                    source = os.path.join(sim.resdir,sim.infile),
                    destination = sim.resdir
                    )
                if 'stat' in request.data_sources:
                    request.data_sources.remove('stat')
                #end if
                if 'storeconfig' in request.data_sources:
                    request.data_sources.remove('storeconfig')
                #end if
                if 'traces' in request.data_sources:
                    request.data_sources.remove('traces')
                #end if
            #end if
        elif isinstance(arg0,QmcpackAnalysisRequest):
            request = arg0
        elif isinstance(arg0,str):
            kwargs['source']=arg0
            request = QmcpackAnalysisRequest(**kwargs)
        else:
            if 'source' not in kwargs:
                kwargs['source']='./qmcpack.in.xml'
            #end if
            request = QmcpackAnalysisRequest(**kwargs)
        #end if

        self.change_request(request)

        if request!=None and os.path.exists(request.source):
            self.init_sub_analyzers(request)
        #end if

        savefile = request.savefile
        savefilepath = os.path.join(request.destination,request.savefile)
        self.info.savefile = savefile
        self.info.savefilepath = savefilepath
        self.info.error = None
        if os.path.exists(savefilepath) and savefile!='':
            self.load()
        elif analyze:
            self.analyze()
        #end if

        return
    #end def __init__


    def change_request(self,request):
        if not isinstance(request,QmcpackAnalysisRequest):
            self.error('input request must be a QmcpackAnalysisRequest',exit=False)
            self.error('  type provided: '+str(type(request)))
        #end if
        request.complete()
        self.info.request = request
    #end def change_request


    def init_sub_analyzers(self,request=None):
        own_request = request==None
        if request==None:
            request = self.info.request
        #end if
        group_num = request.group_num

        #determine if the run was bundled
        if request.source.endswith('.xml'):
            self.info.type = 'single'
        else:
            self.info.type = 'bundled'
            self.bundle(request.source)
            return
        #end if

        self.vlog('reading input file: '+request.source,n=1)
        input = QmcpackInput(request.source)
        input.pluralize()
        input.unroll_calculations()
        calculations = input.simulation.calculations
        self.info.set(
            input = input,
            ordered_input = input.read_xml(request.source)
            )

        project,wavefunction = input.get('project','wavefunction')
        wavefunction = wavefunction.get_single('psi0')

        subindent = self.subindent()

        self.wavefunction = WavefunctionAnalyzer(wavefunction,nindent=subindent)

        self.vlog('project id: '+project.id,n=1)
        file_prefix  = project.id
        if group_num!=None:
            group_ext = '.g'+str(group_num).zfill(3)
            if not file_prefix.endswith(group_ext):
                file_prefix += group_ext
            #end if
        elif self.info.type=='single':
            resdir,infile = os.path.split(request.source)
            #ifprefix = infile.replace('.xml','')
            ifprefix = infile.replace('.xml','.')
            ls = os.listdir(resdir)
            for filename in ls:
                if filename.startswith(ifprefix) and filename.endswith('.qmc'):
                    group_tag = filename.split('.')[-2]
                    #file_prefix = 'qmc.'+group_tag
                    file_prefix = project.id+'.'+group_tag
                    break
                #end if
            #end for
        #end if
        if 'series' in project:
            series_start = int(project.series)
        else:
            series_start = 0
        #end if

        self.vlog('data file prefix: '+file_prefix,n=1)

        run_info = obj(
            file_prefix  = file_prefix,
            series_start = series_start,
            source_path  = os.path.split(request.source)[0],
            group_num    = group_num,
            system       = input.return_system()
            )
        self.info.transfer_from(run_info)

        self.set_global_info()

        if len(request.calculations)==0:
            request.calculations = set(series_start+arange(len(calculations)))
        #end if

        method_aliases = dict()
        for method in self.opt_methods:
            method_aliases[method]='opt'
        #end for
        for method in self.vmc_methods:
            method_aliases[method]='vmc'
        #end for
        for method in self.dmc_methods:
            method_aliases[method]='dmc'
        #end for

        method_objs = ['qmc','opt','vmc','dmc']
        for method in method_objs:
            self[method] = QAanalyzerCollection()
        #end for
        for index,calc in calculations.items():
            method = calc.method
            if method in method_aliases:
                method_type = method_aliases[method]
            else:
                self.error('method '+method+' is unrecognized')
            #end if
            if method_type in request.methods:
                series = series_start + index
                if series in request.calculations:
                    if method in self.opt_methods:
                        qma = OptAnalyzer(series,calc,input,nindent=subindent)
                        primary = self.opt
                    elif method in self.vmc_methods:
                        qma = VmcAnalyzer(series,calc,input,nindent=subindent)
                        primary = self.vmc
                    elif method in self.dmc_methods:
                        qma = DmcAnalyzer(series,calc,input,nindent=subindent)
                        primary = self.dmc
                    #end if
                    primary[series]  = qma
                    self.qmc[series] = qma
                #end if
            #end if
        #end for
        for method in method_objs:
            if len(self[method])==0:
                del self[method]
            #end if
        #end for

        #Check for multi-qmc results such as
        # optimization or timestep studies
        results = QAanalyzerCollection()
        if 'opt' in self and len(self.opt)>0:
            optres = OptimizationAnalyzer(input,self.opt,nindent=subindent)
            results.optimization = optres
        #end if
        if 'dmc' in self and len(self.dmc)>1:
            maxtime = 0
            times = dict()
            for series,dmc in self.dmc.items():
                blocks,steps,timestep = dmc.info.method_input.list('blocks','steps','timestep')
                times[series] = blocks*steps*timestep
                maxtime = max(times[series],maxtime)
            #end for
            dmc = QAanalyzerCollection()
            for series,time in times.items():
                if abs(time-maxtime)/maxtime<.5:
                    dmc[series] = self.dmc[series]
                #end if
            #end for
            if len(dmc)>1:
                results.timestep_study = TimestepStudyAnalyzer(dmc,nindent=subindent)
            #end if
        #end if

        if len(results)>0:
            self.results = results
        #end if

        self.unset_global_info()

    #end def init_sub_analyzers


    def set_global_info(self):
        QAanalyzer.request  = self.info.request
        QAanalyzer.run_info = self.info
    #end def set_global_info

    def unset_global_info(self):
        QAanalyzer.request  = None
        QAanalyzer.run_info = None
    #end def unset_global_info


    def load_data(self):
        request = self.info.request
        if not os.path.exists(request.source):
            self.error('path to source\n  '+request.source+'\n  does not exist\n ensure that request.source points to a valid qmcpack input file')
        #end if
        self.set_global_info()
        self.propagate_indicators(data_loaded=False)
        if self.info.type=='bundled' and self.info.perform_bundle_average:
            self.prevent_average_load()
        #end if
        QAanalyzer.load_data(self)
        if self.info.type=='bundled' and self.info.perform_bundle_average:
            self.average_bundle_data()
        #end if
        self.unset_global_info()
    #end def load_data


    def analyze(self,force=False):
        if not self.info.analyzed or force:
            if not self.info.data_loaded:
                self.load_data()
            #end if
            self.vlog('main analysis of QmcpackAnalyzer data',n=1)
            try:
                self.set_global_info()
                self.propagate_indicators(analyzed=False)
                if self.info.type!='bundled':
                    QAanalyzer.analyze(self,force=force)
                else:
                    for analyzer in self.bundled_analyzers:
                        analyzer.analyze()
                    #end for
                    QAanalyzer.analyze(self,force=force)
                #end if
                self.unset_global_info()
            except:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
                msg = ''
                for line in lines:
                    msg+=line
                #end for
                self.info.error = exc_type
                self.warn('runtime exception encountered\n'+msg)
            #end try
            self.vlog('end main analysis of QmcpackAnalyzer data',n=1)
            if self.info.request.savefile!='':
                self.save()
            #end if
        #end if
    #end def analyze


    def bundle(self,source):
        self.vlog('bundled run detected',n=1)
        if os.path.exists(source):
            fobj = open(source,'r')
            lines = fobj.read().split('\n')
            fobj.close()
        else:
            self.error('source file '+source+' does not exist')
        #end if
        infiles = []
        for line in lines:
            ls = line.strip()
            if ls!='':
                infiles.append(ls)
            #end if
        #end for
        self.info.input_infiles = list(infiles)
        analyzers = QAanalyzerCollection()
        request = self.info.request
        path = os.path.split(request.source)[0]
        files = os.listdir(path)
        outfiles = []
        for file in files:
            if file.endswith('qmc'):
                outfiles.append(file)
            #end if
        #end for
        del files
        for i in range(len(infiles)):
            infile = infiles[i]
            prefix = infile.replace('.xml','')
            gn = i
            for outfile in outfiles:
                if outfile.startswith(prefix):
                    gn = int(outfile.split('.')[-2][1:])
                    break
                #end if
            #end for
            req = request.copy()
            req.source = os.path.join(path,infile)
            req.group_num = gn
            qa = QmcpackAnalyzer(req,nindent=self.subindent())
            #qa.init_sub_analyzers(group_num=gn)
            analyzers[gn] = qa
        #end for
        self.bundled_analyzers = analyzers
        self.info.perform_bundle_average = False
        #check to see if twist averaging
        #  indicated by distinct twistnums
        #  or twist in all ids
        twistnums = set()
        twist_ids = True
        for analyzer in analyzers:
            input = analyzer.info.input
            twistnum = input.get('twistnum')
            project = input.get('project')
            if twistnum!=None:
                twistnums.add(twistnum)
            #end if
            twist_ids = twist_ids and 'twist' in project.id
        #end for
        distinct_twistnums = len(twistnums)==len(analyzers)
        twist_averaging = distinct_twistnums or twist_ids
        if twist_averaging:
            self.info.perform_bundle_average = True
        #end if
        example = analyzers.list()[0]
        input,system = example.info.tuple('input','system')
        self.info.set(
            input  = input.copy(),
            system = system.copy()
            )
        self.vlog('average over bundled runs?  {0}'.format(self.info.perform_bundle_average),n=1)
    #end def bundle


    def prevent_average_load(self):
        for method_type in self.capabilities.methods:
            if method_type in self:
                self[method_type].propagate_indicators(data_loaded=True)
            #end if
        #end for
    #end def prevent_average_load


    def average_bundle_data(self):
        analyzers = self.bundled_analyzers
        if len(analyzers)>0:
            self.vlog('performing bundle (e.g. twist) averaging',n=1)
            #create local data structures to match those in the bundle
            example = analyzers.list()[0].copy()
            for method_type in self.capabilities.methods:
                if method_type in self:
                    del self[method_type]
                #end if
                if method_type in example:
                    self.vlog('copying {0} methods from analyzer 0'.format(method_type),n=2)
                    self[method_type] = example[method_type]
                #end if
            #end if
            if 'qmc' in self:
                del self.qmc
            #end if
            if 'qmc' in example:
                self.vlog('copying qmc methods from analyzer 0',n=2)
                self.qmc = example.qmc
            #end if
            if 'wavefunction' in self:
                del self.wavefunction
            #end if
            if 'wavefunction' in example:
                self.vlog('copying wavefunction from analyzer 0',n=2)
                self.wavefunction = example.wavefunction
            #end if
            del example

            if 'qmc' in self:
                #zero out the average data
                self.vlog('zeroing own qmc data',n=2)
                for qmc in self.qmc:
                    qmc.zero_data()
                #end for

                #resize the average data
                self.vlog('finding minimum data size (for incomplete runs)',n=2)
                for analyzer in analyzers:
                    for series,qmc in self.qmc.items():
                        qmc.minsize_data(analyzer.qmc[series])
                    #end for
                #end for

                #accumulate the average data
                self.vlog('accumulating data from bundled runs',n=2)
                for analyzer in analyzers:
                    for series,qmc in self.qmc.items():
                        qmc.accumulate_data(analyzer.qmc[series])
                    #end for
                #end for

                #normalize the average data
                norm_factor = len(analyzers)
                self.vlog('normalizing bundle average (factor={0})'.format(norm_factor),n=2)
                for qmc in self.qmc:
                    qmc.normalize_data(norm_factor)
                #end for
            #end if
        #end if
    #end def average_bundle_data


    def save(self,filepath=None,overwrite=True):
        if filepath==None:
            filepath = self.info.savefilepath
        #end if
        self.vlog('saving QmcpackAnalyzer in file {0}'.format(filepath),n=1)
        if not overwrite and os.path.exists(filepath):
            return
        #end if
        self._unlink_dynamic_methods()
        self.saved_global = QAobject._global
        self._save(filepath)
        self._relink_dynamic_methods()
        return
    #end def save

    def load(self,filepath=None):
        if filepath==None:
            filepath = self.info.savefilepath
        #end if
        self.vlog('loading QmcpackAnalyzer from file {0}'.format(filepath),n=1)
        self._load(filepath)
        QAobject._global = self.saved_global
        del self.saved_global
        self._relink_dynamic_methods()
        return
    #end def load


    def check_traces(self,verbose=False,pad=None,header=None):
        if pad is None:
            pad = ''
        #end if
        if header is None:
            header = '\nChecking traces'
        #end if
        if 'qmc' in self:
            if verbose:
                self.log(pad+header)
                pad += '  '
            #end if
            for method in self.qmc:
                method.check_traces(pad)
            #end for
        else:
            if verbose:
                self.log(pad+'\nNo traces to check')
            #end if
            return None
        #end if
    #end def check_traces


    def plot_trace(self,quantity,style='b-',offset=0,source='scalar',mlabels=True,
                   mlines=True,show=True,alloff=False):
        mlabels &= not alloff
        mlines  &= not alloff
        show    &= not alloff
        shw = show
        offset = int(offset)
        id = self.info.input.get('project').id
        sdata = obj()
        series = sorted(self.qmc.keys())
        q = []
        soffset = offset
        for s in series:
            qmc = self.qmc[s]
            method = qmc.info.method
            if source=='scalar' or method=='vmc':
                src = qmc.scalars.data
            elif source=='dmc':
                src = qmc.dmc.data
            else:
                self.error('invalid source: '+source)
            #end if
            if quantity in src:
                qn = list(src[quantity])
            else:
                qn = len(src.LocalEnergy)*[0]
            #end if
            q.extend(qn)
            sdata[s] = obj(
                mlab = method+' '+str(s),
                mloc = soffset + len(qn)//2,
                line_loc = soffset + len(qn)-1
                )
            soffset += len(qn)
        #end for
        q = array(q)
        qmin = q.min()
        qmax = q.max()
        mlabel_height = qmin + .8*(qmax-qmin)
        if shw:
            figure()
        #end if
        plot(offset+arange(len(q)),q,style,label=id)
        for s in series:
            sd = sdata[s]
            if mlabels:
                text(sd.mloc,mlabel_height,sd.mlab)
            #end if
            if mlines:
                plot([sd.line_loc,sd.line_loc],[qmin,qmax],'k-')
            #end if
        #end for
        if shw:
            title('{0} vs series for {1}'.format(quantity,id))
            xlabel('blocks')
            ylabel(quantity)
            legend()
            show()
        #end if
    #end def plot_trace

#end class QmcpackAnalyzer