qmcpack/nexus/lib/vasp_analyzer.py

##################################################################
##  (c) Copyright 2015-  by Jaron T. Krogel                     ##
##################################################################


#====================================================================#
#  vasp_analyzer.py                                                  #
#    Supports analysis of vasp output data including OUTCAR and      #
#    xml data (vasprun.xml).  All data (bands, forces, stress, etc.) #
#    are available in a structured format.                           #
#                                                                    #
#  Content summary:                                                  #
#    VaspAnalyzer                                                    #
#      SimulationAnalyzer class for VASP.                            #
#                                                                    #
#    OutcarData                                                      #
#      Reads/processes data from OUTCAR file.                        #
#                                                                    #
#    VaspLines                                                       #
#      Convenience class for line advancing read of OUTCAR file.     #
#                                                                    #
#    read_vxml                                                       #
#      Function to read vasprun.xml.  Returns nested object          #
#      representation of the file.                                   #
#                                                                    #
#    VXML                                                            #
#      Represents an xml node of vasprun.xml.                        #
#                                                                    #
#====================================================================#


import os
from numpy import array,zeros
from generic import obj
from simulation import Simulation,SimulationAnalyzer
from vasp_input import VaspInput,Incar
from developer import DevBase
from debug import *


# vasp xml reader classes/functions

class VXML(DevBase):
    basic_types = set('i v dimension field set time'.split())

    data_types = obj(int=int,string=str,float=float)

    def __init__(self,tag,attr=None):
        self._tag   = tag
        self._lines = []
        self._attr  = None
        self._value = None
        if attr is not None and len(attr)>0:
            self._attr = obj()
            tokens = attr.split('"')
            next=0
            for token in tokens:
                next+=1
                if len(token)>0 and token[-1]=='=':
                    name = token.strip()[:-1]
                    self._attr[name]=tokens[next].replace(' ','_').replace('-','_').lower()
                #end if
            #end for
        #end if
    #end def __init__

    def _is_empty(self):
        return len(self)-4==0
    #end def _is_empty

    def _add(self,new):
        tag = new._tag
        if not tag in self:
            self[tag] = new
        else:
            cur = self[tag]
            if 0 in cur:
                cur._append(new)
            else:
                coll = VXMLcoll(tag)
                coll._append(cur)
                coll._append(new)
                self[tag] = coll
            #end if
        #end if
    #end def _add

    def _parse(self):
        # rename sub objects if name is present
        for name in list(self.keys()):
            value = self[name]
            if isinstance(value,VXML):
                if value._attr is not None and 'name' in value._attr:
                    del self[name]
                    self[value._attr.name] = value
                    del value._attr.name
                elif isinstance(value,VXMLcoll):
                    for n in list(value.keys()):
                        v = value[n]
                        if isinstance(v,VXML):
                            if v._attr is not None and 'name' in v._attr:
                                del value[n]
                                self[v._attr.name] = v
                                del v._attr.name
                            #end if
                        #end if
                    #end for
                    if len(value)==0:
                        del self[name]
                    else:
                        value._reorder()
                    #end if
                #end if
            #end if
        #end for
        # have all sub objects parse (read fields, set _value)
        for v in self:
            if isinstance(v,VXML):
                v._parse()
            #end if
        #end for
        lines = self._lines
        if len(lines)>0:
            #fail = False
            #try:
            if self._tag=='array':
                self._parse_array(lines)
            else:
                self._parse_values(lines)
            #end if
            #except Exception,e:
            #    print '_parse failed!'
            #    print e
            #    print self
            #    print tag
            #    print attr
            #    print lines[0:min(10,len(lines))]
            #    print
            #    print
            #    fail = True
            ##end try
            #if fail:
            #    self.error('parse failed please see information above','read_vxml')
            ##end if
        #end if

        # if sub-objects resolve to a value, replace with that value
        for name in list(self.keys()):
            value = self[name]
            if isinstance(value,VXML) and value._value is not None:
                self[name] = value._value
            #end if
        #end for

        # assign attributes
        if self._attr is not None:
            if 'type' in self._attr:
                del self._attr.type
            #end if
            self.transfer_from(self._attr)
        #end if

        return
    #end def _parse


    def _parse_values(self,lines):
        if len(lines)==1 and not '<' in lines[0]:
            self._value = readval(lines[0])
        else:
            arr = None
            for line in lines:
                start = line.startswith('<') and not line.startswith('</')
                end   = line.endswith('>')
                if start:
                    fline = line
                else:
                    fline += line
                #end if
                if end:
                    tokens = fline.replace('<','|').replace('>','|').split('|')
                    tn = tokens[1]
                    val = readval(tokens[2].strip())
                    if 'name=' in tn:
                        name = tn.split('name="',1)[1].split('"')[0].lower()
                        self[name] = val
                    elif arr is None:
                        arr = [val]
                    else:
                        arr.append(val)
                    #end if
                #end if
            #end for
            if arr is not None:
                self._value = array(arr)
            #end if
        #end if
    #end def parse_values


    def _parse_array(self,lines):
        #print 'parsing array'
        dims       = obj()
        fields     = obj()
        dim_counts = None
        field_list = []
        level      = -1
        set_dims   = False
        for line in lines:
            if line.startswith('<'):
                if line.startswith('<dimension'):
                    tokens = line.replace('<','|').replace('>','|').split('|')
                    tn = tokens[1]
                    dname = tokens[2].lower().replace(' ','_').replace('-','_')
                    if 'dim=' in tn:
                        d = int(tn.split('dim="',1)[1].split('"')[0])
                        dims[d] = dname
                    else:
                        dims.append(dname)
                    #end if
                elif line.startswith('<field'):
                    tokens = line.replace('<','|').replace('>','|').split('|')
                    tn = tokens[1]
                    fname = tokens[2].lower().replace(' ','_').replace('-','_')
                    if 'type=' in tn:
                        t = tn.split('type="',1)[1].split('"')[0]
                        if t in VXML.data_types:
                            dtype = VXML.data_types[t]
                        else:
                            self.error('field type {0} is unrecognized: {1}'.format(t,line))
                        #end if
                    else:
                        dtype = float
                    #end if
                    fields.append(obj(name=fname,dtype=dtype))
                elif line.startswith('<set'):
                    if not set_dims:
                        dims   = dims.list()
                        dims.reverse()
                        dims = tuple(dims)
                        dim_counts = zeros((len(dims),),dtype=int)
                        set_dims = True
                    #end if
                    level += 1
                    dim_counts[level]=0
                elif line.startswith('</set'):
                    level -= 1
                    if level!=-1:
                        dim_counts[level]+=1
                    #end if
                else:
                    self.error('array parsing failed\n unrecognized xml encountered: {0}'.format(line),'read_vxml')
                #end if
            else:
                dim_counts[level]+=1
                field_list.append(line.split())
            #end if
        #end for
        self.dims   = dims
        for findex,field in fields.items():
            lst = []
            for field_vals in field_list:
                lst.append(field_vals[findex])
            #end for
            arr = array(lst,dtype=field.dtype).ravel()
            arr.shape = tuple(dim_counts)
            self[field.name] = arr
        #end for
        #print '  done'
    #end def _parse_array


    def _remove_empty(self):
        for n in list(self.keys()):
            v = self[n]
            if isinstance(v,VXML):
                v._remove_empty()
                if isinstance(v,VXMLcoll) and v._is_empty():
                    del self[n]
                #end if
            #end if
        #end for
    #end def _remove_empty


    def _remove_hidden(self):
        del self._tag
        del self._attr
        del self._lines
        del self._value
        for v in self:
            if isinstance(v,VXML):
                v._remove_hidden()
            #end if
        #end for
    #end def _remove_hidden()
#end class VXML


class VXMLcoll(VXML):
    def _append(self,new):
        index = len(self)-4
        self[index]=new
    #end def _append

    def _reorder(self):
        n=0
        for key in self.sorted_keys():
            value = self[key]
            if isinstance(value,VXML):
                del self[key]
                self[n]=value
                n+=1
            #end if
        #end for
    #end def _reorder
#end class VXMLcoll


booldict = dict(T=True,F=False)

def readval(val):
    fail = False
    split = False
    if isinstance(val,str):
        split = ' ' in val
    #end if
    if isinstance(val,list) or split:
        if split:
            val = val.split()
        #end if
        try:
            v = array(val,dtype=int)
        except:
            try:
                v = array(val,dtype=float)
            except:
                try:
                    v = array(val,dtype=str)
                except:
                    fail = True
                #end try
            #end try
        #end try
    elif val in booldict:
        v = booldict[val]
    else:
        try:
            v = int(val)
        except:
            try:
                v = float(val)
            except:
                v = val
            #end try
        #end try
    #end if
    if fail:
        VXML.class_error('failed to read value: "{0}"'.format(val),'read_vxml')
    #end if
    return v
#end def readval


def read_vxml(filepath):
    if not os.path.exists(filepath):
        VXML.class_error('file {0} does not exist'.format(filepath),'read_vxml')
    #end if
    #print 'read'
    contents = open(filepath,'r').read()
    #print 'replace'
    contents = contents.replace('<rc>',' ').replace('</rc>',' ')
    contents = contents.replace('<r>' ,' ').replace('</r>' ,' ')
    contents = contents.replace('<c>' ,' ').replace('</c>' ,' ')
    #print 'split lines'
    lines    = contents.splitlines()

    #print 'process lines'
    root = VXML('vasprun')
    stack = [root]
    cur = stack[0]
    for line in lines:
        ls = line.strip()
        if ls.startswith('</'):
            tag = ls[2:-1]
            if tag==cur._tag:
                stack.pop()
                cur = stack[-1]
                #print len(stack)*'  '+'end '+tag
            else:
                cur._lines.append(ls)
            #end if
        elif ls.startswith('<?'):
            None
        elif ls.startswith('<'):
            ta,rest = ls[1:].split('>',1)
            tokens = ta.split(' ',1)
            tag = tokens[0]
            if not tag in VXML.basic_types:
                if len(tokens)==1:
                    attr = None
                else:
                    attr = tokens[1].strip()
                #end if
                if ls.endswith('</{0}>'.format(tag)):
                    new = VXML(tag,attr)
                    new._lines.append(ls.replace('<','|').replace('>','|').split('|')[2])
                    cur._add(new)
                    #print len(stack)*'  '+'end '+tag
                else:
                    #print len(stack)*'  '+tag
                    new = VXML(tag,attr)
                    cur._add(new)
                    cur = new
                    stack.append(cur)
                #end if
            else:
                cur._lines.append(ls)
            #end if
        else:
            cur._lines.append(ls)
        #end if
    #end for

    if len(stack)!=1:
        VXML.class_error('read failed\nxml tree did not seem to close')
    #end if

    #print 'parse'
    root._parse()
    root._remove_empty()
    root._remove_hidden()

    #print 'done'
    return root
#end def read_vxml


# vasp outcar functions

class VaspLines(DevBase):
    def __init__(self,lines):
        self.pointer = 0
        self.lines   = lines
    #end def __init__

    def advance_line(self,amount):
        self.pointer += amount
        return self.lines[self.pointer]
    #end def advance_line

    def advance_token(self,token):
        psave = self.pointer
        for line in self.lines[self.pointer:]:
            if token in line:
                return line
            #end if
            self.pointer += 1
        #end while
        self.pointer = psave
        return None
    #end def advance

    def advance(self,amount):
        self.pointer += amount
    #end def advance

    def remainder(self):
        return self.lines[self.pointer:]
    #end def remainder

    def rewind(self,point=0):
        self.pointer = point
    #end def rewind

    def get_line(self,point=None):
        if point is None:
            point = self.pointer
        #end if
        return self.lines[point]
    #end def get_line

    def get_line_ahead(self,nahead):
        return self.lines[self.pointer+nahead]
    #end def get_line_ahead
#end class VaspLines


def read_outcar_header_values(vlines,odata):
    line = vlines.advance_token('TOTEN')
    odata.total_energy = float(line.split()[4])
    vlines.advance_token('energy without entropy')
#end def read_outcar_header_values


def read_outcar_core_potentials(vlines,odata):
    line = vlines.advance_token('the test charge radii are')
    odata.core_potential_radii = array(line.split()[5:],dtype=float)
    vlines.advance(2)
    n = 0
    cpots = []
    for line in vlines.remainder():
        ls = line.strip()
        n+=1
        if len(ls)==0:
            break
        #end if
        tokens = line.replace('-',' -').split()
        cpots.extend(tokens[1::2])
    #end for
    odata.core_potentials = array(cpots,dtype=float)
    vlines.advance(n)
#end def read_outcar_core_potentials


def read_outcar_fermi_energy(vlines,odata):
    line = vlines.advance_token('E-fermi')
    odata.Efermi = float(line.split()[2])
#end def read_outcar_fermi_energy


def read_outcar_bands(vlines,odata):
    bands = obj()
    line = vlines.advance_token('spin component')
    if line is not None:
        last_empty = True
        n  = 0
        for line in vlines.remainder():
            if len(line)>2:
                if line[1]=='s':
                    ns = int(line.split()[2])
                    spin = obj()
                    bands[ns] = spin
                elif line[1]=='k':
                    tokens = line.split()
                    nk = int(tokens[1])
                    kp = array(tokens[3:],dtype=float)
                    kpoint = obj(kpoint=kp,energies=[],occupations=[])
                    spin[nk]=kpoint
                elif line[2]=='b':
                    None
                else:
                    bnum,energy,occ = line.split()
                    kpoint.energies.append(float(energy))
                    kpoint.occupations.append(float(occ))
                #end if
                last_empty = False
            else:
                if last_empty:
                    break
                #end if
                last_empty = True
            #end if
            n+=1
        #end for
        vlines.advance(n)
    #end if
    for ns,spin in bands.items():
        for nk,kpoint in spin.items():
            kpoint.energies    = array(kpoint.energies,dtype=float)
            kpoint.occupations = array(kpoint.occupations,dtype=float)
        #end for
    #end for
    odata.bands = bands
#end def read_outcar_bands


def read_outcar_charge_mag(vlines,odata,token):
    ion   = obj(s=[],p=[],d=[],tot=[])
    total = obj()
    vlines.advance_token(token)
    vlines.advance(4)
    prev_end = False
    n=0
    for line in vlines.remainder():
        n+=1
        if prev_end:
            break
        #end if
        if line[0]=='-':
            prev_end = True
        else:
            vals = array(line.split()[1:],dtype=float)
            ion.s.append(vals[0])
            ion.p.append(vals[1])
            ion.d.append(vals[2])
            ion.tot.append(vals[3])
        #end if
    #end for
    for channel,vals in ion.items():
        ion[channel] = array(vals,dtype=float)
    #end for
    vlines.advance(n)
    vals = array(line.split()[1:],dtype=float)
    total.s   = vals[0]
    total.p   = vals[1]
    total.d   = vals[2]
    total.tot = vals[3]
    return ion,total
#end def read_outcar_charge_mag


def read_outcar_total_charge(vlines,odata):
    ion,total = read_outcar_charge_mag(vlines,odata,'total charge ') # trailing space is important
    odata.ion_charge   = ion
    odata.total_charge = total
#end def read_outcar_total_charge


def read_outcar_magnetization(vlines,odata):
    ion,total = read_outcar_charge_mag(vlines,odata,'magnetization')
    odata.ion_magnetization   = ion
    odata.total_magnetization = total
#end def read_outcar_magnetization


def read_outcar_stress(vlines,odata):
    vlines.advance_token('FORCE on cell')
    line = vlines.advance_line(1)
    dirs = line.split()[1:]
    st       = array(vlines.advance_token('Total').split()[1:],dtype=float)
    st_kb    = array(vlines.advance_line(1).split()[2:],dtype=float)
    pressure = float(vlines.advance_line(1).split()[3])
    stress = obj()
    stress_kb = obj()
    for i in range(len(dirs)):
        d = dirs[i].lower()
        stress[d]    = st[i]
        stress_kb[d] = st_kb[i]
    #end for
    odata.stress    = stress
    odata.stress_kb = stress_kb
    odata.pressure  = pressure
#end def read_outcar_stress


def read_outcar_cell(vlines,odata):
    vlines.advance_token('VOLUME and BASIS')
    volume = float(vlines.advance_line(3).split()[-1])
    a1 = vlines.advance_line(2).split()[0:3]
    a2 = vlines.advance_line(1).split()[0:3]
    a3 = vlines.advance_line(1).split()[0:3]
    lattice_vectors = array([a1,a2,a3],dtype=float)
    odata.volume = volume
    odata.lattice_vectors = lattice_vectors
#end def read_outcar_cell


def read_outcar_position_force(vlines,odata):
    position    = []
    force       = []
    vlines.advance_token('POSITION')
    vlines.advance(2)
    prev_end = False
    for line in vlines.remainder():
        if prev_end:
            break
        #end if
        if line[1]=='-':
            prev_end = True
        else:
            tokens = line.split()
            position.append(tokens[0:3])
            force.append(tokens[3:6])
        #end if
    #end for
    total_drift = line.split()[2:5]
    odata.position = array(position,dtype=float)
    odata.force    = array(force,dtype=float)
    odata.total_drift = array(total_drift,dtype=float)
#end def read_outcar_position_force


def read_outcar_accounting(vlines,odata):
    time = obj()
    memory = obj()
    vlines.advance_token('General timing and accounting')
    vlines.advance(2)
    time.cpu     = float(vlines.advance_line(1).split()[-1])
    time.user    = float(vlines.advance_line(1).split()[-1])
    time.system  = float(vlines.advance_line(1).split()[-1])
    time.elapsed = float(vlines.advance_line(1).split()[-1])
    vlines.advance(1)
    memory.maximum = float(vlines.advance_line(1).split()[-1])
    memory.average = float(vlines.advance_line(1).split()[-1])
    odata.time   = time
    odata.memory = memory
#end def read_outcar_accounting


class OutcarData(DevBase):
    any_functions = [
        ('header_values'   , read_outcar_header_values  ),
        ]
    elast_functions = [
        ('core_potentials' , read_outcar_core_potentials),
        ('fermi_energy'    , read_outcar_fermi_energy   ),
        ('bands'           , read_outcar_bands          ),
        ('total_charge'    , read_outcar_total_charge   ),
        ('magnetization'   , read_outcar_magnetization  ),
        ('stress'          , read_outcar_stress         ),
        ('cell'            , read_outcar_cell           ),
        ('position_force'  , read_outcar_position_force ),
        ]
    ilast_functions = [
        ('accounting'      , read_outcar_accounting     ),
        ]

    read_outcar_functions = any_functions + elast_functions + ilast_functions

    def __init__(self,filepath=None,lines=None):
        if filepath is not None:
            if not os.path.exists(filepath):
                self.error('file {0} does not exist'.format(filepath))
            #end if
            f = open(filepath,'r')
            lines = f.read().splitlines()
            f.close()
        #end if
        self.vlines   = VaspLines(lines)
    #end def __init__


    def read(self,ilast=False,elast=False,all=True):
        ilast |= all
        elast |= all
        vlines = self.vlines
        del self.vlines
        read_functions = []
        read_functions.extend(self.any_functions)
        if elast:
            read_functions.extend(self.elast_functions)
            if ilast:
                read_functions.extend(self.ilast_functions)
            #end if
        #end if
        for quantity,read_function in read_functions:
            try:
                read_function(vlines,self)
            except:
                None
            #end try
        #end for
    #end def read
#end class OutcarData


# main analyzer class

class VaspAnalyzer(SimulationAnalyzer):
    def __init__(self,arg0=None,xml=False,analyze=False):
        path     = None
        prefix   = None
        incar    = None
        outcar   = None
        xml_file = None
        neb      = False
        if isinstance(arg0,Simulation):
            sim = arg0
            file = sim.infile
            path   = sim.locdir
        elif arg0 is not None:
            path,file = os.path.split(arg0)
        else:
            file = ''
            path = ''
            xml  = False
        #end if
        if len(file)>0:
            if file.endswith('INCAR'):
                incar  = file
                prefix = file.replace('INCAR','').strip()
            elif file.endswith('OUTCAR'):
                prefix = file.replace('OUTCAR','').strip()
            else:
                self.error('please provide the path to an INCAR or OUTCAR file')
            #end if
            incar   = prefix+'INCAR'
            outcar  = prefix+'OUTCAR'
            xml_file = prefix+'vasprun.xml'
            if prefix=='':
                prefix=None
            #end if
        #end if
        incar_file  = incar
        outcar_file = outcar
        if incar_file is not None:
            incar = None
            incar_path = os.path.join(path,incar_file)
            neb = False
            if incar_file is not None and os.path.exists(incar_path):
                incar = Incar(incar_path)
                if 'images' in incar:
                    neb = True
                #end if
            #end if
        #end if
        self.info = obj(
            path         = path,
            prefix       = prefix,
            incar_file   = incar_file,
            outcar_file  = outcar_file,
            incar        = incar,
            xml_file     = xml_file,
            xml          = xml,
            neb          = neb,
            )
        if analyze:
            self.analyze()
        #end if
    #end def __init__


    def analyze(self,outcar=None):
        if self.info.neb:
            self.neb_analyzers = obj()
            for i in range(self.info.incar.images):
                n = i+1
                self.neb_analyzers[n] = VaspAnalyzer(
                    arg0    =  os.path.join(self.info.path,str(n).zfill(2),'OUTCAR'),
                    xml     = self.info.xml,
                    analyze = True
                    )
            #end for
            return
        #end if

        if outcar is None and self.info.outcar_file is not None:
            outcar = os.path.join(self.info.path,self.info.outcar_file)
        #ned if
        if self.info.xml:
            self.xmldata = read_vxml(os.path.join(self.info.path,self.info.xml_file))
        #end if
        if outcar is not None:
            self.analyze_outcar(outcar)
        #end if
    #end def analyze


    def analyze_outcar(self,outcar):
        if not os.path.exists(outcar):
            self.error('outcar file {0} does not exist'.format(outcar))
        #end if
        oc = open(outcar,'r')
        lines = oc.read().splitlines()
        oc.close()
        del oc
        # gather initialization lines
        init = []
        n = 0
        for line in lines:
            if len(line)>0 and line[0]=='-' and 'Iteration' in line:
                break
            #end if
            init.append(line)
            n+=1
        #end for
        # gather lines for each iteration
        ion_steps = obj()
        for line in lines[n:]:
            if len(line)>0 and line[0]=='-' and 'Iteration' in line:
                iteration = []
                inum,enum = line.strip(' -Iteration)').split('(')
                inum = int(inum)
                enum = int(enum)
                if not inum in ion_steps:
                    ion_steps[inum] = obj()
                #end if
                ion_steps[inum][enum] = OutcarData(lines=iteration)
            #end if
            iteration.append(line)
        #end for
        del lines
        del n
        # read data from each iteration
        if len(ion_steps)>0:
            imax = array(list(ion_steps.keys()),dtype=int).max()
            for inum,ion_step in ion_steps.items():
                ilast = inum==imax
                if len(ion_step)>0:
                    emax = array(list(ion_step.keys()),dtype=int).max()
                    for enum,elec_step in ion_step.items():
                        elast = enum==emax
                        elec_step.read(ilast,elast,all=False)
                        if ilast and elast:
                            self.transfer_from(elec_step)
                        #end if
                    #end for
                #end if
            #end for
        #end if
        self.ion_steps = ion_steps
    #end def analyze_outcar
#end class VaspAnalyzer