#! /usr/bin/env python3 import os import sys from optparse import OptionParser def find_nexus_modules(): import sys nexus_lib = os.path.abspath(os.path.join(__file__,'..','..','lib')) assert(os.path.exists(nexus_lib)) sys.path.append(nexus_lib) #end def find_nexus_modules def import_nexus_module(module_name): import inspect import importlib return importlib.import_module(module_name) #end def import_nexus_module # Load Nexus modules try: # Attempt specialized path-based imports. # (The executable should still work even if Nexus is not installed) find_nexus_modules() versions = import_nexus_module('versions') nexus_version = versions.nexus_version del versions generic = import_nexus_module('generic') obj = generic.obj del generic developer = import_nexus_module('developer') DevBase = developer.DevBase unavailable = developer.unavailable del developer unit_converter = import_nexus_module('unit_converter') convert = unit_converter.convert del unit_converter numerics = import_nexus_module('numerics') simstats = numerics.simstats simplestats = numerics.simplestats equilibration_length = numerics.equilibration_length del numerics except: # Failing path-based imports, import installed Nexus modules. from versions import nexus_version from generic import obj from developer import DevBase,unavailable from unit_converter import convert from numerics import simstats,simplestats,equilibration_length #end try # numpy imports try: import numpy as np except (ImportError,RuntimeError): np = unavailable('numpy') #end try # matplotlib imports matplotlib_import_success = False try: import matplotlib.pyplot as plt params = {'legend.fontsize':14,'figure.facecolor':'white','figure.subplot.hspace':0., 'axes.labelsize':16,'xtick.labelsize':14,'ytick.labelsize':14} plt.rcParams.update(params) matplotlib_import_success = True except (ImportError,RuntimeError): matplotlib_import_success = False #end try def sigfig_round(some_float, nsigfig): if nsigfig >= 1 and nsigfig == int(nsigfig): return round(some_float, nsigfig - int(np.floor(np.log10(abs(some_float))))-1) else: QMCA.class_error("Number of significant figures must be a whole number\nreceived {0}".format(nsigfig)) #end if #end sigfig_round class ColorWheel(DevBase): def __init__(self): colors = 'Black Maroon DarkOrange Green DarkBlue Purple Gray Firebrick Orange MediumSeaGreen DodgerBlue MediumOrchid'.split() lines = '- -- -. :'.split() markers = '. v s o ^ d p'.split() ls = [] for line in lines: for color in colors: ls.append((color,line)) #end for #end for ms = [] for i in range(len(markers)): ms.append((colors[i],markers[i])) #end for mls = [] ic=-1 for line in lines: for marker in markers: ic = (ic+1)%len(colors) mls.append((colors[ic],marker+line)) #end for #end for self.line_styles = ls self.marker_styles = ms self.marker_line_styles = mls self.reset() #end def __init__ def next_line(self): self.iline = (self.iline+1)%len(self.line_styles) return self.line_styles[self.iline] #end def next_line def next_marker(self): self.imarker = (self.imarker+1)%len(self.marker_styles) return self.marker_styles[self.imarker] #end def next_marker def next_marker_line(self): self.imarker_line = (self.imarker_line+1)%len(self.marker_line_styles) return self.marker_line_styles[self.imarker_line] #end def next_marker_line def reset(self): self.iline = -1 self.imarker = -1 self.imarker_line = -1 #end def reset def reset_line(self): self.iline = -1 #end def reset_line def reset_marker(self): self.imarker = -1 #end def reset_marker def reset_marker_line(self): self.imarker_line = -1 #end def reset_marker_line #end class ColorWheel color_wheel = ColorWheel() class QBase(DevBase): options = obj() autocorr = True quantity_abbreviations = obj( e = 'LocalEnergy', v = 'Variance', k = 'Kinetic', p = 'LocalPotential', ee = 'ElecElec', ii = 'IonIon', l = 'LocalECP', n = 'NonLocalECP', ce = 'CorrectedEnergy', mpc = 'MPC', kc = 'KEcorr', bw = 'BlockWeight', bc = 'BlockCPU', ar = 'AcceptRatio', eff = 'Efficiency', te = 'TrialEnergy', de = 'DiffEff', w = 'Weight', nw = 'NumOfWalkers', sw = 'AvgSentWalkers', tt = 'TotalTime', ts = 'TotalSamples', le2= 'LocalEnergy_sq', fl = 'Flux', # RMC k_m = 'Kinetic_m', k_p = 'Kinetic_p', p_p = 'LocalPotential_pure', ee_m = 'ElecElec_m', ee_p = 'ElecElec_p', # CSVMC e_A = 'LocEne_0', e_B = 'LocEne_1', de_AB = 'dLocEne_0_1', k_A = 'Kinetic_0', k_B = 'Kinetic_1', k_AB = 'dKinetic_0_1', p_A = 'LocPot_0', p_B = 'LocPot_1', p_AB = 'dLocPot_0_1', ee_A = 'ElecElec_0', ee_B = 'ElecElec_1', dee_AB = 'dElecElec_0_1', ei_A = 'ElecIon_0', ei_B = 'ElecIon_1', ei_AB = 'dElecIon_0_1', ii_A = 'IonIon_0', ii_B = 'IonIon_1', dii_AB = 'dIonIon_0_1', fl_A = 'Flux_0', fl_B = 'Flux_1', fl_AB = 'dFlux_0_1', wp_A = 'wpsi_0', wp_B = 'wpsi_1', # AFQMC el = 'EnergyEstim__nume_real' ) def log(self,*texts,**kwargs): if len(kwargs)>0: n = kwargs['n'] else: n=0 #end if text='' for t in texts: text+=str(t)+' ' #end for pad = n*' ' self._logfile.write(pad+text.replace('\n','\n'+pad)+'\n') #end def log def check_matplotlib_import(self): if not self.options.nowarn and not matplotlib_import_success: self.warn('Python library matplotlib raised uncaught exceptions during import. Plots may or may not work.') #end if #end def check_matplotlib_import #end class QBase class DatAnalyzer(QBase): first = 'LocalEnergy Variance Kinetic LocalPotential ElecElec LocalECP NonLocalECP IonIon'.split() last = 'MPC KEcorr BlockWeight BlockCPU AcceptRatio Efficiency TotalTime TotalSamples TrialEnergy DiffEff Weight NumOfWalkers LivingFraction AvgSentWalkers CorrectedEnergy'.split() nonenergy = set('BlockWeight BlockCPU AcceptRatio Efficiency TotalTime TotalSamples DiffEff Weight NumOfWalkers LivingFraction AvgSentWalkers'.split()) whole_numbers = ['TotalSamples', 'NumOfWalkers'] energy_sq = set(['Variance','LocalEnergy_sq']) non_ee_potentials = ['IonIon','LocalECP','NonLocalECP','ElecIon','IonElec'] def __init__(self,filepath,index,type='unknown',units='Ha'): self.info = obj( index = index, type = type, units = units, series = None, filepath = None, filename = None, prefix = None, nsamples = None, weight = 1.0, load_failed = False, ) self.data = obj() self.stats = obj() self.load_data(filepath) #end def __init__ def extract_info(self,filepath): series = None path,filename = os.path.split(filepath) tokens = filename.split('.') for token in tokens: if len(token)==4 and token[0]=='s' and token[1:].isdigit(): series=int(token[1:]) break #end if #end for if series is None: self.error('unable to read series number for file '+filepath,'QMCA') #end if prefix = filepath.split('.s'+str(series).zfill(3))[0] self.info.set( series = series, filepath = filepath, filename = filename, prefix = prefix ) #end def extract_info def get_equilibration(self): eq = self.options.equilibration if eq=='auto': if 'LocalEnergy' in self.data: nbe = equilibration_length(self.data.LocalEnergy) else: nbe = 0 #end if elif isinstance(eq,(int,np.int_)): nbe = eq elif not self.info.series in eq: self.error('equilibration length was not provided for series {0}\nplease use the -e option to provide an equilibration length'.format(self.info.series),'QMCA') else: nbe = eq[self.info.series] #end if if nbe<0: self.error('equilibration length cannot be negative\nyou requested an equilibration length of {0} for series {1}'.format(nbe,self.info.series),'QMCA') #end if return nbe #end def get_equilibration def load_data(self,filepath): self.data.clear() self.extract_info(filepath) lt = np.loadtxt(filepath) if len(lt.shape)==1: lt.shape = (1,len(lt)) #end if if lt.size==0: self.info.load_failed = True return #end if data = lt[:,1:].transpose() self.info.nsamples = data.shape[1] fobj = open(filepath,'r') variables = fobj.readline().split()[2:] # fobj.close() Econv = convert(1.0,'Ha',self.info.units) Econv2 = Econv**2 for i in range(len(variables)): var = variables[i] if var in self.nonenergy: self.data[var]=data[i,:] elif var in self.energy_sq: self.data[var]=Econv2*data[i,:] else: self.data[var]=Econv*data[i,:] #end if #end for #end def load_data def stat_value(self,v): if self.autocorr: (mean,var,error,kappa)=simstats(v) else: (mean,var,error,kappa)=simplestats(v,full=True) #end if sv = obj( mean = mean, var = var, error = error, kappa = kappa ) return sv #end def stat_value def analyze(self): self.stats.clear() nbe = self.get_equilibration() data = self.data stats = self.stats for varname,samples in data.items(): if nbe>=len(samples): self.error('equilibration length for series {0} is greater than the amount of data in the file ({1} elements)\nyou requested an equilibration length of {2}'.format(self.info.series,len(samples),nbe),'QMCA') #end if stats[varname] = self.stat_value(samples[nbe:]) #end for if 'LocalEnergy_sq' in data and 'LocalEnergy' in data: v = data.LocalEnergy_sq - data.LocalEnergy**2 data.Variance = v stats.Variance = self.stat_value(v[nbe:]) #end if # Find finite size corrected energy if 'LocalEnergy' in data and ('MPC' in data or 'KEcorr' in data): le_recovered = False # Attempt to make a correction that is right regardless of whether # ElecElec/MPC are present or "physical" in QMCPACK's parlance. if 'Kinetic' in data: # Attempt to reconstruct the total energy w/o e-e contribution non_ee_energies = self.non_ee_potentials nee = data.Kinetic.copy() for pot in self.non_ee_potentials: if pot in data: nee += data[pot] #end if #end for # Allow the correction only if the local energy sum is correct. # This might not be possible if the non-e-e potential energy terms # listed in non_ee_potentials are not comprehensive for whatever # reason (e.g. the user included a new potential of their own making # that qmca is not aware of). le_ref = data.LocalEnergy.sum() for ee in ('ElecElec','MPC'): if ee in data: le_sum = (nee + data[ee]).sum() le_recovered |= abs((le_sum-le_ref)/le_ref)<1e-8 #end if #end if # Provided the local energy can be recovered, the corrected # local energy is always correct if MPC is used when present. if le_recovered: ce = nee if 'MPC' in data: ce += data.MPC elif 'ElecElec' in data: ce += data.ElecElec #end if #end if #end if # Fall back to using LocalEnergy alone in case attempt above is # flawed due to imperfect knowledge. This case represents an # assumption that MPC is used only as a post-correction. if not le_recovered: ce = data.LocalEnergy.copy() if 'MPC' in data and 'ElecElec' in data: ce += data.MPC - data.ElecElec #end if #end if # Add in the kinetic energy correction, if present. if 'KEcorr' in data: ce += data.KEcorr #end if data.CorrectedEnergy = ce stats.CorrectedEnergy = self.stat_value(ce[nbe:]) #end if if len(data)>0: example = data.first() if 'NumOfWalkers' in data: ts = data.NumOfWalkers.sum() stats.TotalSamples = obj(mean=ts,var=0.0,error=0.0,kappa=1.0) data.TotalSamples = 0*example #end if if 'BlockWeight' in data: bw = data.BlockWeight[nbe:] ts = bw.sum() stats.TotalSamples = obj(mean=ts,var=0.0,error=0.0,kappa=1.0) data.TotalSamples = 0*example #end if if 'BlockCPU' in data: bc = data.BlockCPU[nbe:] tt = bc.sum() stats.TotalTime = obj(mean=tt,var=0.0,error=0.0,kappa=1.0) data.TotalTime = 0*example #end if if 'LocalEnergy' in data and 'BlockWeight' in data and 'BlockCPU' in data: e = stats.LocalEnergy.error w = stats.BlockWeight.mean t = stats.BlockCPU.mean wt = (bc*bw).sum()/3600 # def. of efficiency in energy.pl #stats.Efficiency = obj(mean=w/t,var=0.0,error=0.0,kappa=1.0) # efficiency based on total work and error bar if (e > 0.0): stats.Efficiency = obj(mean=1.0/(e**2*wt),var=0.0,error=0.0,kappa=1.0) else: stats.Efficiency = obj(mean=0.0,var=0.0,error=0.0,kappa=1.0) #end if data.Efficiency = 0*example #end if #end if #end def analyze def zero(self): self.info.weight = 0.0 for d in self.data: d[:] = 0.0 #end for for s in self.stats: s.set( mean = 0.0, error = 0.0, kappa = 0.0, var = 0.0 ) #end for #end def zero def accumulate(self,other,weight): self.info.weight += weight for q,v in other.data.items(): d = self.data[q] minlen = min(len(d),len(v)) self.data[q] = d[0:minlen] + weight*v[0:minlen] #end for #end def accumulate def normalize(self): w = self.info.weight for d in self.data: d[:] /= w #end for #end def normalize def join(self,others): eq = self.options.equilibration quantities = self.data.keys() for q in quantities: qlist = [self.data[q]] for other in others: if eq=='auto': nbe = 0 else: nbe = other.get_equilibration() #end if qlist.append(other.data[q][nbe:]) #end for self.data[q] = np.concatenate(qlist) #end for #end def join def write_stats(self,quantities,first_prefix=False,first_series=False,only_series=False): opt = self.options stats = self.stats prefix = self.info.prefix series = self.info.series qset = set(quantities) evset = set(['LocalEnergy','Variance']) cvset = set(['CorrectedEnergy','Variance']) if first_series and not only_series: self.log('') #end if if opt.desired_error or opt.particle_number: ratio = 1.0 current_error = sigfig_round(stats.LocalEnergy.error, 1) if opt.desired_error: ratio = stats.LocalEnergy.error / opt.desired_error phrase = "{:<50} {} {}".format( "To change the current error of {} {} to ".format( current_error, opt.units ), opt.desired_error, opt.units ) self.log("\n{:<50}".format(phrase)) #end if if opt.particle_number: ratio *= opt.particle_number[0]/opt.particle_number[1] phrase = "{:<50} {}".format( "To increase current particle number of {} to".format( opt.particle_number[1] ), opt.particle_number[0] ) if opt.desired_error: self.log("{:<50}".format(phrase)) else: self.log("\n{:<50}".format(phrase)) phrase = "{:<50} {} {}".format( "Maintaining current error bar of ", current_error, opt.units ) self.log("{:<50}".format(phrase)) #end if #end if required_samples = int((ratio ** 2) * stats.TotalSamples.mean) required_samples = int(sigfig_round(required_samples, 3)) factor = required_samples / stats.TotalSamples.mean factor = sigfig_round(factor, 3) self.log("{:<50} {:,}".format( "Change sample number to", required_samples ) ) self.log("{:<50} {}\n".format( "Multiplying current sample number by a factor of", factor ) ) exit() #end if if len(quantities)==1 and not opt.all_quantities: line = self.stat_line(quantities[0],prec='8.6f') self.log('{0} series {1} {2}'.format(prefix,series,line)) elif qset==evset: if first_prefix and first_series: self.log(' LocalEnergy Variance ratio') #end if line = self.EV_line(['LocalEnergy','Variance']) self.log('{0} series {1} {2}'.format(prefix,series,line)) elif qset==cvset: if first_prefix and first_series: self.log(' CorrectedEnergy Variance ratio') #end if line = self.EV_line(['CorrectedEnergy','Variance']) self.log('{0} series {1} {2}'.format(prefix,series,line)) else: if opt.all_quantities: squants = set(stats.keys()) else: squants = set(quantities) #end if first = [] for qn in self.first: if qn in squants: first.append(qn) #end if #end for last = [] for qn in self.last: if qn in squants: last.append(qn) #end if #end for mid = sorted(list(squants-set(first)-set(last))) order = first+mid+last self.log('\n{0} series {1}'.format(prefix,series)) for qname in order: if qname in stats: if qname in self.whole_numbers: line = self.stat_line(qname,prec='16.0f') else: line = self.stat_line(qname) #end if if qname=='CorrectedEnergy': self.log(len(line)*'-',n=1) #end if self.log(line,n=1) #end if #end for #end if #end def write_stats def stat_line(self,qname,compress=False,prec='auto'): opt = self.options q = self.stats[qname] if opt.precision is not None: prec = opt.precision line = '{0:<22}= {1: '+prec+'} +/- {2: '+prec+'}' elif prec=='auto': if (q.mean !=0.0 and abs(q.error/q.mean) > 1e-8): d = int(max(2,1-np.floor(np.log(q.error)/np.log(10.)))) else: d = 2 #end if d = str(d) line = '{0:<22}= {1:16.'+d+'f} +/- {2:16.'+d+'f}' else: line = '{0:<22}= {1:'+prec+'} +/- {2:'+prec+'}' #end if vals = [qname,q.mean,q.error] opt = self.options if opt.show_variance: line += ' {'+str(len(vals))+':4.3e}' vals.append(q.var) #end if if opt.show_autocorr: line += ' {'+str(len(vals))+':>4.1f}' vals.append(q.kappa) #end if line = line.format(*vals) if compress: tokens = line.split() line='' for token in tokens: line+=token+' ' #end for #end if return line #end def stat_line def EV_line(self,quants): line = '' for qname in quants: qline = self.stat_line(qname,prec='8.6f') line += qline.split('=')[1].strip()+' ' #end for E = self.stats[quants[0]] V = self.stats[quants[1]] line += '{0:6.4f}'.format(abs(V.mean/E.mean)) return line #end def EV_line def plot_series(self,quantity,style,prefix): if not quantity in self.stats: self.error('quantity '+quantity+' is not present in the data for {0}'.format(self.info.filepath),'QMCA') #end if s = self.info.series q = self.stats[quantity] #errorbar(s,q.mean,q.error,fmt=style,label=prefix) return (s,s),(q.mean,q.mean),s,q.mean,q.error #end def plot_series def plot_trace(self,quantity,style,prefix,shift): self.check_matplotlib_import() if not quantity in self.data: self.error('quantity '+quantity+' is not present in the data for {0}'.format(self.info.filepath),'QMCA') #end if nbe = self.get_equilibration() q = self.data[quantity] middle = int(len(q)//2) qmean = q[middle:].mean() qmax = q[middle:].max() qmin = q[middle:].min() ylims = (qmean-2*(qmean-qmin),qmean+2*(qmax-qmean)) smean,svar = self.stats[quantity].tuple('mean','var') sstd = np.sqrt(svar) s = np.arange(len(q))+shift xlims = min(s),max(s) #plot(s,q,style,label=prefix) plt.plot([nbe+shift,nbe+shift],ylims,'k-.',lw=2) plt.plot(xlims,[smean,smean],'r-') plt.plot(xlims,[smean+sstd,smean+sstd],'r-.') plt.plot(xlims,[smean-sstd,smean-sstd],'r-.') return xlims,ylims,s,q #end def plot_trace def plot_histogram(self,quantity,style,prefix): self.not_implemented() #if not quantity in self.data: # self.error('quantity '+quantity+' is not present in the data for {0}'.format(self.info.filepath),'QMCA') ##end if #nbe = self.get_equilibration() #q = self.data[quantity] #middle = int(len(q)/2) #qmean = q[middle:].mean() #qmax = q[middle:].max() #qmin = q[middle:].min() #ylims = (qmean-2*(qmean-qmin),qmean+2*(qmax-qmean)) #smean,svar = self.stats[quantity].tuple('mean','var') #sstd = np.sqrt(svar) #s = np.arange(len(q))+shift #xlims = min(s),max(s) ##plot(s,q,style,label=prefix) #plot([nbe+shift,nbe+shift],ylims,'k-.',lw=2) #plot(xlims,[smean,smean],'r-') #plot(xlims,[smean+sstd,smean+sstd],'r-.') #plot(xlims,[smean-sstd,smean-sstd],'r-.') #ylims = qmin,qmax #return xlims,ylims,s,q #end def plot_histogram #end class DatAnalyzer def comma_list(s): if ',' in s: s = s.replace(',',' ') #end if return s.split() #end def comma_list class QMCA(QBase): def __init__(self): self.parser = None self.file_list = [] self.file_map = obj() self.prefix_list = [] self.data = obj() self.verbose = False self.read_command_line() self.load_data() self.analyze_data() self.exit() #end def __init__ def help(self): self.log('\n'+self.parser.format_help().strip(),n=1) self.log('\nAbbreviations and full names for quantities:\n'+str(self.quantity_abbreviations),n=1) self.exit() #end def help def examples(self): examples = ''' QMCA examples: ============= all quantities single file qmca qmc.s000.scalar.dat qmca qmc.s000.dmc.dat single quantity (local energy) single file qmca -q e qmc.s000.scalar.dat qmca -q e qmc.s000.dmc.dat single quantity (local energy) multiple files multiple series qmca -q e qmc.s*.scalar.dat multiple prefixes qmca -q e *.s000.scalar.dat multiple series and prefixes qmca -q e *.s*.scalar.dat multiple quantities, multiple files qmca -q ekp *.s*.scalar.dat special format for energy, variance, & ratio qmca -q ev qmc.s*.scalar.dat setting equilibration lengths equilibration length of 10 applied to all files qmca -e 10 qmc.s000.scalar.dat qmc.s001.scalar.dat equilibration lengths of 10 and 20 applied series 0 and 1 qmca -e '10 20' qmc.s000.scalar.dat qmc.s001.scalar.dat twist averaging equal weights qmca -a qmc.g000.s*.scalar.dat qmc.g001.s*.scalar.dat user specified weights (w=2 for g=0 and w=4 for g=1) qmca -a -w '2 4' qmc.g000.s*.scalar.dat qmc.g001.s*.scalar.dat save averaged data to scalar files (useful for timestep extrapolating averaged quantities) qmca -a -w '2 4' --save_average qmc.g000.s*.scalar.dat qmc.g001.s*.scalar.dat plotting vs series qmca -p -q e -e 10 qmc.s*.scalar.dat plotting vs samples (sample traces) qmca -t -q e -e 10 qmc.s*.scalar.dat plotting multiple twists on the same plot qmca -po -q e -e 10 qmc.g*.s*.scalar.dat qmca -to -q e -e 10 qmc.g*.s*.scalar.dat plotting twist averages qmca -pa -q e -e 10 qmc.g*.s*.scalar.dat qmca -ta -q e -e 10 qmc.g*.s*.scalar.dat multiple quantities and prefixes can be used for plots as well ''' self.log(examples) self.exit() #end def examples def exit(self): if self.verbose: self.log('\nQMCA finished\n') #end if exit() #end def exit def read_command_line(self): usage = '''usage: %prog [options] [file(s)]''' parser = OptionParser(usage=usage,add_help_option=False,version='%prog {}.{}.{}'.format(*nexus_version)) self.parser = parser # available option letters: c, f, g, k, l, m, y, z parser.add_option('-v','--verbose',dest='verbose', action='store_true',default=False, help='Print detailed information (default=%default).' ) parser.add_option('-q','--quantities',dest='quantities', default='all', help='Quantity or list of quantities to analyze. See names and abbreviations below (default=%default).' ) parser.add_option('-u','--units',dest='units', default='Ha', help='Desired energy units. Can be Ha (Hartree), Ry (Rydberg), eV (electron volts), kJ_mol (k. joule/mole), K (Kelvin), J (Joules) (default=%default).' ) parser.add_option('-e','--equilibration',dest='equilibration', default='auto', help='Equilibration length in blocks (default=%default).' ) parser.add_option('-a','--average',dest='average', action='store_true',default=False, help='Average over files in each series (default=%default).' ) parser.add_option('--save_average',dest='save_average', action='store_true',default=False, help='Save averaged data into scalar.dat files. (default=%default).' ) parser.add_option('-w','--weights',dest='weights', default='None', help='List of weights for averaging (default=%default).' ) parser.add_option('-j','--join',dest='join', default='None', help='Join data for a range of series (default=%default).' ) parser.add_option('-b','--reblock',dest='reblock', action='store_true',default=False, help='(pending) Use reblocking to calculate statistics (default=%default).' ) parser.add_option('-p','--plot',dest='plot', action='store_true',default=False, help='Plot quantities vs. series (default=%default).' ) parser.add_option('-t','--trace',dest='trace', action='store_true',default=False, help='Plot a trace of quantities (default=%default).' ) parser.add_option('-h','--histogram',dest='histogram', action='store_true',default=False, help='(pending) Plot a histogram of quantities (default=%default).' ) parser.add_option('-o','--overlay',dest='overlay', action='store_true',default=False, help='Overlay plots (default=%default).' ) parser.add_option('--legend',dest='legend', default='upper right', help='Placement of legend. None for no legend, outside for outside legend (default=%default).' ) parser.add_option('--noautocorr',dest='noautocorr', action='store_true',default=False, help='Do not calculate autocorrelation. Warning: error bars are no longer valid! (default=%default).' ) parser.add_option('--noac',dest='noautocorr', action='store_true',default=False, help='Alias for --noautocorr (default=%default).' ) parser.add_option('--sac',dest='show_autocorr', action='store_true',default=False, help='Show autocorrelation of sample data (default=%default).' ) parser.add_option('--sv',dest='show_variance', action='store_true',default=False, help='Show variance of sample data (default=%default).' ) parser.add_option('--sort',dest='sort', action='store_true',default=False, help='Display output data sorted alphabetically by file name (default=%default).' ) parser.add_option('-i','--image',dest='image', action='store_true',default=False, help='(pending) Save image files (default=%default).' ) parser.add_option('-r','--report',dest='report', action='store_true',default=False, help='(pending) Write a report (default=%default).' ) parser.add_option('-s','--show_options',dest='show_options', action='store_true',default=False, help='Print user provided options (default=%default).' ) parser.add_option('-x','--examples',dest='examples', action='store_true',default=False, help='Print examples and exit (default=%default).' ) parser.add_option('--help',dest='help', action='store_true',default=False, help='Print help information and exit (default=%default).' ) parser.add_option('-d', '--desired_error', dest='desired_error', type="float", default=None, help='Show number of samples needed for desired error bar (default=%default).' ) parser.add_option('-n', '--enlarge_system',dest='particle_number', type="int", nargs=2, default=None, help=('Show number of samples needed to maintain ' 'error bar on larger system: desired particle number ' 'first, current particle number second (default=%default)' ) ) parser.add_option('--fp',dest='precision', default=None, help='Sets the floating point precision of displayed statistical results. Must be a floating point format string such as 16.8f, 8.6e, or similar (default=%default).' ) parser.add_option('--nowarn',dest='nowarn', action='store_true',default=False, help='Suppress warning messages (default=%default).' ) parser.add_option('--average_all',dest='average_all', action='store_true',default=False, help='Average over all files, ignoring differences in path (default=%default).' ) parser.add_option('--twist_info',dest='twist_info', default='use', help='Use twist weights in twist_info.dat files or not. Options: "use", "ignore", "require". "use" means use when present, "ignore" means do not use, "require" means must be used (default=%default).' ) unsupported = set('histogram image reblock report'.split()) units = obj(hartree='Ha',ha='Ha',ev='eV',rydberg='Ry',ry='Ry',kelvin='K',kj_mol='kJ_mol',joules='J') options,files_in = parser.parse_args() files_tmp = np.array(files_in) file_order = files_tmp.argsort() if len(files_in)>0: files_in = list(files_tmp[file_order]) #end if QBase.options.transfer_from(options.__dict__) self.verbose = self.options.verbose if self.verbose: self.log('\nQMCA: Qmcpack Analysis Tool') self.log('processing options',n=1) #end if opt = self.options if opt.examples: self.examples() #end if for optname in unsupported: if opt[optname]!=False: self.log('note: option "{0}" is not yet supported'.format(optname),n=1) opt[optname]=False #end if #end for if opt.noautocorr: QBase.autocorr = False #end if u = opt.units.lower() if not u in units: self.error('unrecognized unit system requested: {0}\nvalid units are: {1}'.format(u, sorted(units.keys()))) else: opt.units = units[u] #end if if opt.equilibration!='auto': try: eq = comma_list(opt.equilibration) eq = np.array(eq,dtype=int) if len(eq)==1: e = eq[0] else: e = obj() for s,ev in enumerate(eq): e[s] = ev #end for #end if opt.equilibration = e except: self.error('cannot process equilibration option\nvalue must be an integer or list of integers\nyou provided: '+str(opt.equilibration)) #end if #end if if opt.average: ws = opt.weights if ws=='None': opt.weights = None else: try: w = comma_list(ws) w = np.array(w,dtype=float) if len(w)==len(file_order): w = w[file_order] #end if opt.weights = w weight_failed = len(w.shape)!=1 except: self.error('cannot process weights option\nweights must be a list of real values\nyou provided: {0}'.format(ws)) #end try #end if #end if if opt.join=='None': opt.join=None else: join_failed = False try: j = comma_list(opt.join) j = np.array(j,dtype=int) except: join_failed = True #end try if join_failed or len(j)!=2: self.error('cannot process join option\njoin must be a pair of integers\nyou provided: {0}'.format(ws)) #end if opt.join = j #end if if opt.image=='None': opt.image=None #end if twist_info_options = ('use','ignore','require') if opt.twist_info not in twist_info_options: self.error('twist_info option is invalid\ntwist_info must be one of: {}\nyou provided: {}'.format(twist_info_options,opt.twist_info)) #end if opt.all_quantities = False qabbr = self.quantity_abbreviations if ' ' in opt.quantities: quants = opt.quantities.split() else: quants = [opt.quantities] #end if remove = [] for i in range(len(quants)): qo = quants[i] q = qo.lower() if q=='all': opt.all_quantities=True quants.extend(qabbr.values()) remove.append(qo) elif not qo in qabbr.values(): if q in qabbr: quants[i] = qabbr[q] else: match = False for qa in sorted(qabbr.keys()): qname = qabbr[qa] if qa in q: quants.append(qname) q=q.replace(qa,'') match=True #end if #end for if match and q=='': remove.append(qo) else: self.error('{0} is not a valid quantity\nplease choose quantities from the following list:\n{1}'.format(qo,str(qabbr))) #end if #end if #end if #end for opt.quantities = list(set(quants)-set(remove)) if opt.precision is not None: try: fp = ('{0: '+opt.precision+'}').format(1.234567) except: self.error('floating point format is not a proper format\nformat provided via --fp: {0}\nplease use a standard floating point format statement like 16.8f, 8.6e, or similar'.format(opt.precision)) #end try #end if if opt.show_options or self.verbose: self.log('options provided:',n=1) self.log(str(self.options),n=1) #end if if len(files_in)==0 or opt.help: self.log('no files provided, please see help info below',n=1) self.help() #end if self.file_list.extend(files_in) #end def read_command_line def load_data(self): allowed_extensions = obj(scalar='scalar.dat',dmc='dmc.dat') opt = self.options data = self.data ifile = 0 seriesset = set() for file in self.file_list: if not os.path.exists(file): self.error('file {0} does not exist'.format(file)) #end if isdat = False for filetype,ext in allowed_extensions.items(): if file.endswith(ext): isdat = True #try: d=DatAnalyzer( filepath = file, index = ifile, type = filetype, units = opt.units ) if d.info.load_failed: if not self.options.nowarn: self.warn('Load failed for {}, results unavailable.'.format(file)) #end if continue #end if self.file_map[file] = d prefix = d.info.prefix series = d.info.series seriesset.add(series) if not prefix in data: data[prefix]=obj() self.prefix_list.append(prefix) #end if data[prefix][series] = d #except: # self.log('problem encountered for {0}, results unavailable '.format(file)) ##end try break #end if #end for if not isdat: self.error('{0} is not a valid source of input\nplease aim qmca at files with the following extensions: {1}'.format(file,allowed_extensions.list())) #end if ifile+=1 #end for if opt.average: for prefix,pdata in data.items(): pset = set(pdata.keys()) missing = seriesset-pset if len(missing)>1: self.error('files with prefix {0} are missing series provided for other prefixes\nseries provided for prefix {0}: {1}\nseries missing: {2}'.format(prefix,sorted(list(pset)),sorted(list(missing)))) #end if #end for serieslist = sorted(list(seriesset)) for series in serieslist: filetypes = set() for prefix,pdata in data.items(): filetypes.add(pdata[series].info.type) #end for if len(filetypes)>1: self.error('files with mixed extensions encountered for series {0}\nextensions encountered: {1}\nplease provide files with only one of the following extensions: {2}'.format(series,sorted(list(filetypes)),allowed_extensions.list())) #end if #end for weights = opt.weights prefixes = sorted(list(data.keys())) # identify batches of prefixes for averaging batch_prefixes = obj() for prefix in prefixes: path,file = os.path.split(prefix) tokens = file.split('.') batch_prefix = os.path.join(path,tokens[0]) if batch_prefix not in batch_prefixes: batch_prefixes[batch_prefix] = [] #end if batch_prefixes[batch_prefix].append(prefix) #end for # perform the averaging file_list = [] file_map = obj() prefix_list = [] avg_data = obj() if opt.average_all or weights is not None or len(batch_prefixes)==1: # average all data together if weights is not None: if len(weights)!=len(prefixes): self.error('one weight must be provided per file prefix for averaging\nyou provided {0} weights for {1} prefixes\nweights provided: {2}\nprefixes provided: {3}'.format(len(weights),len(prefixes),weights,prefixes)) #end if else: uniform_weights = len(prefixes)*[1.0] if opt.twist_info=='ignore': weights = uniform_weights else: weights = [] for prefix in sorted(prefixes): twist_info_file = prefix+'.twist_info.dat' if os.path.exists(twist_info_file): fobj = open(twist_info_file) try: weight = float(fobj.read().strip().split()[0]) weights.append(weight) except: None #end try #end if #end for if len(weights)!=len(prefixes): if opt.twist_info=='require': self.error('twist_info files are either missing or mis-formatted for batch prefix {}'.format(batch_prefix)) else: weights = uniform_weights #end if #end if #end if #end if adata = obj() ns=0 for series in serieslist: npf = 0 for prefix in sorted(data.keys()): pdata = data[prefix] w = weights[npf] d = pdata[series] if npf==0: avg = d.copy() di = d.info avg.info.set( filename = di.filename.replace(di.prefix,'avg'), filepath = di.filepath.replace(di.prefix,'avg'), index = ns, prefix = 'avg' ) avg.zero() #end if avg.accumulate(d,w) npf+=1 #end for avg.normalize() file_list.append(avg.info.filepath) file_map[avg.info.filepath] = avg adata[series] = avg ns+=1 #end for prefix_list.append('avg') avg_data.avg = adata else: # average data by shared batch prefix for batch_prefix in sorted(batch_prefixes.keys()): prefixes = batch_prefixes[batch_prefix] adata = obj() ns=0 uniform_weights = len(prefixes)*[1.0] if opt.twist_info=='ignore': weights = uniform_weights else: weights = [] for prefix in sorted(prefixes): twist_info_file = prefix+'.twist_info.dat' if os.path.exists(twist_info_file): fobj = open(twist_info_file) try: weight = float(fobj.read().strip().split()[0]) weights.append(weight) except: None #end try #end if #end for if len(weights)!=len(prefixes): if opt.twist_info=='require': self.error('twist_info files are either missing or mis-formatted for batch prefix {}'.format(batch_prefix)) else: weights = uniform_weights #end if #end if #end if for series in serieslist: npf = 0 for prefix in sorted(prefixes): pdata = data[prefix] w = weights[npf] d = pdata[series] if npf==0: avg = d.copy() di = d.info avg.info.set( filename = di.filename.replace(di.prefix,batch_prefix), filepath = di.filepath.replace(di.prefix,batch_prefix), index = ns, prefix = batch_prefix, ) avg.zero() #end if avg.accumulate(d,w) npf+=1 #end for avg.normalize() file_list.append(avg.info.filepath) file_map[avg.info.filepath] = avg adata[series] = avg ns+=1 #end for prefix_list.append(batch_prefix) avg_data[batch_prefix] = adata #end for #end if self.file_list = file_list self.file_map = file_map self.prefix_list = prefix_list self.data = avg_data #end if if opt.join is not None: series_join = list(range(opt.join[0],opt.join[1]+1)) if len(series_join)>1: sjmin = series_join[0] sjmax = series_join[-1] ds = sjmax-sjmin for prefix,pdata in self.data.items(): for s in series_join: if s not in pdata: self.error('cannot join series {0}\ndata for this series is missing for {1}'.format(s,prefix)) #end if #end for d = pdata[series_join[0]] others = [] for s in series_join[1:]: others.append(pdata[s]) del pdata[s] #end for d.join(others) for s in sorted(pdata.keys()): if s>sjmax: pdata[s-ds] = pdata[s] del pdata[s] #end if #end for #end for eq = opt.equilibration if isinstance(eq,obj): for s in series_join[1:]: del eq[s] #end for for s in sorted(eq.keys()): if s>sjmax: eq[s-ds] = eq[s] del eq[s] #end if #end for #end if #end if #end if #end def load_data def analyze_data(self): for prefix in sorted(self.data.keys()): pdata = self.data[prefix] for series in sorted(pdata.keys()): d = pdata[series] fail = False #try: d.analyze() #except: # self.log('problem encountered for {0}, results unavailable '.format(d.info.filepath)) # fail = True ##end try if fail: del pdata[series] #end if #end for #end for opt = self.options if opt.average and opt.save_average: for fmap in self.file_map: ndata = len(fmap.data.first()) data = [np.arange(ndata)] keys = 'index' fmt = ['%i'] for key in fmap.data.keys(): data.append(fmap.data[key]) keys+=' {}'.format(key) fmt.append('%.10e') #end for data = np.array(data).transpose() np.savetxt(fmap.info.filepath,data,header=keys,fmt=fmt) #end for #end if show_text = True plots = opt.plot or opt.trace or opt.histogram if opt.sort: prefix_list = sorted(self.data.keys()) else: prefix_list = self.prefix_list #end if if show_text: first_prefix = True for prefix in prefix_list: pdata = self.data[prefix] first_series = True only_series = len(pdata)==1 for series in sorted(pdata.keys()): d = pdata[series] d.write_stats(opt.quantities,first_prefix,first_series,only_series) first_series=False #end for first_prefix = False #end for #end if if plots: self.check_matplotlib_import() color = 'b' line = '-' marker = '.' plotstyle = color+marker+line tracestyle = color+line histstyle = color+line iplot,itrace,ihist = list(range(3)) modes = [opt.plot,opt.trace,opt.histogram] quantities = [] for q in opt.quantities: all_present = True for pdata in self.data: for d in pdata: all_present &= q in d.data #end for #end for if all_present: quantities.append(q) #end if #end for for quantity in quantities: for mode in range(len(modes)): if modes[mode]: npf = 0 for prefix in prefix_list: first_prefix = npf==0 last_prefix = npf==len(self.data)-1 npf+=1 if first_prefix or not opt.overlay: fig = plt.figure() ax = fig.add_subplot(111) xminp = 1e99 xmaxp = -1e99 yminp = 1e99 ymaxp = -1e99 color_wheel.reset() #end if lcolor,lstyle = color_wheel.next_line() mlcolor,mlstyle = color_wheel.next_marker_line() xvals = [] yvals = [] yerrs = [] shifts = [0] series_list = [] pdata = self.data[prefix] only_series = len(pdata)==1 shift = 0 ns = 0 for series in sorted(pdata.keys()): first_series = ns==0 ns+=1 d = pdata[series] if mode==iplot: xlims,ylims,xv,yv,ye = d.plot_series(quantity,plotstyle,prefix) xvals.append(xv) yvals.append(yv) yerrs.append(ye) elif mode==itrace: xlims,ylims,xv,yv = d.plot_trace(quantity,tracestyle,prefix,shift) xvals.extend(xv) yvals.extend(yv) elif mode==ihist: xlims,ylims = d.plot_histogram(quantity,histstyle,prefix) #end if xminp = min(xminp,xlims[0]) xmaxp = max(xmaxp,xlims[1]) yminp = min(yminp,ylims[0]) ymaxp = max(ymaxp,ylims[1]) shift += d.info.nsamples shifts.append(shift) series_list.append(series) #end for if mode==iplot: plt.errorbar(xvals,yvals,yerrs,color=mlcolor,fmt=mlstyle,label=prefix) elif mode==itrace: plt.plot(xvals,yvals,lstyle,color=lcolor,label=prefix) #end if if last_prefix or not opt.overlay: dx = max(.1*abs(xminp-xmaxp),1) xminp -= dx xmaxp += dx dy = .1*abs(yminp-ymaxp) yminp -= dy ymaxp += dy plt.xlim([xminp,xmaxp]) plt.ylim([yminp,ymaxp]) if mode==iplot: plt.xlabel('series') plt.ylabel(quantity) plt.title(quantity+' vs. series') elif mode==itrace: for ishift in range(0,len(shifts)-1): s1 = shifts[ishift] s2 = shifts[ishift+1] smid = (s1+s2)/2 if s2-s1