Add output parser for abinit dimensions

This commit is contained in:
Matteo Giantomassi 2017-11-30 12:01:28 +01:00
parent a6c1143771
commit 89645d377a
6 changed files with 239 additions and 18 deletions

View File

@ -414,6 +414,7 @@ class AbinitOutputFile(AbinitTextFile, NotebookWriter):
return self.to_string()
def to_string(self, verbose=0):
"""String representation."""
lines = ["ndtset: %d, completed: %s" % (self.ndtset, self.run_completed)]
app = lines.append
@ -446,6 +447,119 @@ class AbinitOutputFile(AbinitTextFile, NotebookWriter):
return "\n".join(lines)
def get_dims_spginfo_dataset(self, verbose=0):
"""
Parse the section with the dimensions of the calculation.
Args:
verbose: Verbosity level.
Return: (dims_dataset, spginfo_dataset)
where dims_dataset[i] is an OrderedDict with the dimensions of dataset `i`
spginfo_dataset[i] is a dictionary with space group information.
"""
# If single dataset, we have to parse
#
# Symmetries : space group Fd -3 m (#227); Bravais cF (face-center cubic)
# ================================================================================
# Values of the parameters that define the memory need of the present run
# intxc = 0 ionmov = 0 iscf = 7 lmnmax = 6
# lnmax = 6 mgfft = 18 mpssoang = 3 mqgrid = 3001
# natom = 2 nloc_mem = 1 nspden = 1 nspinor = 1
# nsppol = 1 nsym = 48 n1xccc = 2501 ntypat = 1
# occopt = 1 xclevel = 2
# - mband = 8 mffmem = 1 mkmem = 29
# mpw = 202 nfft = 5832 nkpt = 29
# ================================================================================
# P This job should need less than 3.389 Mbytes of memory.
# Rough estimation (10% accuracy) of disk space for files :
# _ WF disk file : 0.717 Mbytes ; DEN or POT disk file : 0.046 Mbytes.
# ================================================================================
# If multi datasets we have to parse:
# DATASET 2 : space group F-4 3 m (#216); Bravais cF (face-center cubic)
# ================================================================================
# Values of the parameters that define the memory need for DATASET 2.
# intxc = 0 ionmov = 0 iscf = 7 lmnmax = 2
# lnmax = 2 mgfft = 12 mpssoang = 3 mqgrid = 3001
# natom = 2 nloc_mem = 1 nspden = 1 nspinor = 1
# nsppol = 1 nsym = 24 n1xccc = 2501 ntypat = 2
# occopt = 1 xclevel = 1
# - mband = 10 mffmem = 1 mkmem = 2
# mpw = 69 nfft = 1728 nkpt = 2
# ================================================================================
# P This job should need less than 1.331 Mbytes of memory.
# Rough estimation (10% accuracy) of disk space for files :
# _ WF disk file : 0.023 Mbytes ; DEN or POT disk file : 0.015 Mbytes.
# ================================================================================
magic = "Values of the parameters that define the memory need"
memory_pre = "P This job should need less than"
magic_exit = "------------- Echo of variables that govern the present computation"
filesizes_pre = "_ WF disk file :"
def parse_spgline(line):
"""Parse the line with space group info, return dict."""
# Could use regular expressions ...
i = line.find("space group")
spg_str, brav_str = line[i:].replace("space group", "").split(";")
toks = spg_str.split()
return {
"spg_symbol": "".join(toks[:-1]),
"spg_number": int(toks[-1].replace("(", "").replace(")", "").replace("#", "")),
"bravais": brav_str.strip(),
}
from abipy.tools.numtools import grouper
dims_dataset, spginfo_dataset = OrderedDict(), OrderedDict()
inblock = 0
with open(self.filepath, "rt") as fh:
for line in fh:
line = line.strip()
if line.startswith(magic_exit):
break
if not line or line.startswith("===") or line.startswith("---") or line.startswith("Rough estimation"):
continue
if verbose: print("inblock:", inblock, " at line:", line)
if line.startswith("DATASET") or line.startswith("Symmetries :"):
# Get dataset index, parse space group and lattice info, init new dims dict.
inblock = 1
if line.startswith("Symmetries :"):
# No multidataset
dtindex = 1
else:
tokens = line.split()
dtindex = int(tokens[1])
dims_dataset[dtindex] = dims = OrderedDict()
spginfo_dataset[dtindex] = parse_spgline(line)
continue
if inblock == 1 and line.startswith(magic):
inblock = 2
continue
if inblock == 2:
# Lines with data.
if line.startswith(memory_pre):
dims["mem_per_proc_mb"] = float(line.replace(memory_pre, "").split()[0])
elif line.startswith(filesizes_pre):
tokens = line.split()
mbpos = [i - 1 for i, t in enumerate(tokens) if t.startswith("Mbytes")]
assert len(mbpos) == 2
dims["wfk_size_mb"] = float(tokens[mbpos[0]])
dims["denpot_size_mb"] = float(tokens[mbpos[1]])
else:
if line and line[0] == "-": line = line[1:]
tokens = grouper(2, line.replace("=", "").split())
if verbose: print("tokens:", tokens)
dims.update([(t[0], int(t[1])) for t in tokens])
return dims_dataset, spginfo_dataset
def next_gs_scf_cycle(self):
"""
Return the next :class:`GroundStateScfCycle` in the file. None if not found.

View File

@ -30,7 +30,6 @@ class AbinitOutputTest(AbipyTest):
with AbinitOutputFile(abo_path) as abo:
repr(abo); str(abo)
assert abo.to_string(verbose=2)
assert abo.version == "8.0.6"
assert abo.run_completed
assert not abo.dryrun_mode
@ -43,6 +42,25 @@ class AbinitOutputTest(AbipyTest):
assert abo.initial_structure == abo.final_structure
abo.diff_datasets(1, 2, dryrun=True)
# Test the parsing of dimension and spginfo
dims_dataset, spginfo_dataset = abo.get_dims_spginfo_dataset()
assert len(dims_dataset) == 2 and list(dims_dataset.keys()) == [1, 2]
dims1 = dims_dataset[1]
assert dims1["iscf"] == 7
assert dims1["nfft"] == 5832
self.assert_almost_equal(dims1["mem_per_proc_mb"], 3.045)
self.assert_almost_equal(dims1["wfk_size_mb"], 0.717)
self.assert_almost_equal(dims1["denpot_size_mb"], 0.046)
assert spginfo_dataset[1]["spg_symbol"] == "Fd-3m"
assert spginfo_dataset[1]["spg_number"] == 227
assert spginfo_dataset[1]["bravais"] == "Bravais cF (face-center cubic)"
dims2 = dims_dataset[2]
assert dims2["iscf"] == -2
assert dims2["n1xccc"] == 2501
self.assert_almost_equal(dims2["mem_per_proc_mb"], 1.901)
self.assert_almost_equal(dims2["wfk_size_mb"], 0.340)
self.assert_almost_equal(dims2["denpot_size_mb"], 0.046)
print(abo.events)
gs_cycle = abo.next_gs_scf_cycle()
assert gs_cycle is not None
@ -107,6 +125,20 @@ class AbinitOutputTest(AbipyTest):
assert abo.initial_structure.abi_spacegroup is not None
# This to test get_dims_spginfo_dataset with one dataset.
dims_dataset, spg_dataset = abo.get_dims_spginfo_dataset()
assert len(dims_dataset) == 1
dims = dims_dataset[1]
assert dims["nsppol"] == 1
assert dims["nsym"] == 48
assert dims["nkpt"] == 29
self.assert_almost_equal(dims["mem_per_proc_mb"], 3.389)
self.assert_almost_equal(dims["wfk_size_mb"], 0.717)
self.assert_almost_equal(dims["denpot_size_mb"], 0.046)
assert spg_dataset[1]["spg_symbol"] == "Fd-3m"
assert spg_dataset[1]["spg_number"] == 227
assert spg_dataset[1]["bravais"] == "Bravais cF (face-center cubic)"
def test_all_outputs_in_tests(self):
"""
Try to parse all Abinit output files inside the Abinit `tests` directory.

View File

@ -92,7 +92,7 @@ class A2f(object):
# TODO: Add ElectronDos
app("Isotropic lambda: %.3f" % (self.lambda_iso))
app("Omega_log: %s [eV], %s [K]" % (self.omega_log, self.omega_log * abu.eV_to_K))
for mustar in (0.1, 0.2):
for mustar in (0.1, 0.12, 0.2):
app("\tFor mustar %s: McMillan Tc: %s [K]" % (mustar, self.get_mcmillan_tc(mustar)))
if verbose:
@ -114,20 +114,15 @@ class A2f(object):
"""
#return 270 / abu.eV_to_K
iw = self.iw0 + 1
#iw = self.iw0 + 100
wmesh, a2fw = self.mesh[iw:], self.values[iw:]
#wmesh = wmesh * units.eV_to_Ha
#wmesh = wmesh * abu.eV_to_THz
#ax, fig, plt = get_ax_fig_plt(ax=None)
#ax.plot(wmesh, a2fw / wmesh * np.log(wmesh))
#plt.show()
integral = simps(a2fw / wmesh * np.log(wmesh), x=wmesh)
#return np.exp(2.0 / self.lambda_iso * integral) * units.Ha_to_eV
#return np.exp(2.0 / self.lambda_iso * integral) / abu.eV_to_THz
return np.exp(2.0 / self.lambda_iso * integral) #/ abu.eV_to_THz
fw = a2fw / wmesh * np.log(wmesh)
integral = simps(fw, x=wmesh)
return np.exp(1.0 / self.lambda_iso * integral)
def get_moment(self, n, spin=None, cumulative=False):
r"""
@ -166,6 +161,9 @@ class A2f(object):
def get_mcmillan_tc(self, mustar):
"""
Computes the critical temperature with the McMillan equation and the input mustar.
Return:
Tc in Kelvin degrees
"""
tc = (self.omega_log / 1.2) * \
np.exp(-1.04 * (1.0 + self.lambda_iso) / (self.lambda_iso - mustar * (1.0 + 0.62 * self.lambda_iso)))
@ -173,7 +171,11 @@ class A2f(object):
def get_mustar_from_tc(self, tc):
"""
Return the value of mustar that gives the critical temperature tc in K in the McMillan equation.
Return the value of mustar that gives the critical temperature `tc` in the McMillan equation.
Args:
tc:
Critical temperature in Kelvin
"""
l = self.lambda_iso
num = l + (1.04 * (1 + l) / np.log(1.2 * abu.kb_eVK * tc / self.omega_log))
@ -383,6 +385,7 @@ class A2f(object):
Returns:
`matplotlib` figure
"""
# TODO start and stop to avoid singularity in Mc Tc
mustar_values = np.linspace(start, stop, num=num)
tc_vals = [self.get_mcmillan_tc(mustar) for mustar in mustar_values]
@ -426,7 +429,6 @@ class A2Ftr(object):
raise ValueError("Cannot find zero in energy mesh")
# TODO Change name.
class EphFile(AbinitNcFile, Has_Structure, Has_ElectronBands, NotebookWriter):
"""
@ -471,9 +473,10 @@ class EphFile(AbinitNcFile, Has_Structure, Has_ElectronBands, NotebookWriter):
app("")
# E-PH section
app(marquee("E-PH calculation", mark="="))
app(self.a2f_qcoarse.to_string(title="A2f coarse:", verbose=verbose))
app("Has transport a2Ftr(w): %s" % self.has_a2ftr)
app(self.a2f_qcoarse.to_string(title="A2f(w) on the ab-initio q-mesh:", verbose=verbose))
app("")
app(self.a2f_qintp.to_string(title="A2f interpolated:", verbose=verbose))
app(self.a2f_qintp.to_string(title="A2f(w) interpolated on the dense q-mesh:", verbose=verbose))
return "\n".join(lines)

View File

@ -327,6 +327,55 @@ def flow_compare_hist(flow, nids=None, with_spglib=False, verbose=0,
return df
def flow_get_dims_dataframe(flow, nids=None, printout=False, with_colors=False):
"""
Analyze output files produced by Abinit tasks. Print pandas DataFrame with dimensions.
Args:
nids: List of node identifiers. By defaults all nodes are shown
printout: True to print dataframe.
with_colors: True if task status should be colored.
"""
abo_paths, index, status, abo_relpaths, task_classes = [], [], [], [], []
for task in flow.iflat_tasks(nids=nids):
if task.status not in (flow.S_OK, flow.S_RUN): continue
#if not task.is_abinit_task: continue
abo_paths.append(task.output_file.path)
index.append(task.pos_str)
status.append(task.status.colored if with_colors else str(task.status))
abo_relpaths.append(os.path.relpath(task.output_file.relpath))
task_classes.append(task.__class__.__name__)
if not abo_paths: return
# Get dimensions from output files.
rows = []
for p in abo_paths:
print(p)
with abilab.AbinitOutputFile(p) as abo:
try:
dims_dataset, spg_dataset = abo.get_dims_spginfo_dataset()
except Exception as exc:
cprint("Exception while trying to get dimensions from %s\n%s" % (p, str(exc)), "yellow")
continue
rows.append(dims_dataset[1])
import pandas as pd
df = pd.DataFrame(rows, index=index, columns=list(rows[0].keys()))
# Add columns to the dataframe.
status = [str(s) for s in status]
df["task_class"] = task_classes
df["relpath"] = abo_relpaths
df["status"] = status
if printout:
abilab.print_dataframe(df, title="Table with Abinit dimensions:\n")
return df
def flow_compare_abivars(flow, varnames, nids=None, wslice=None, printout=False, with_colors=False):
"""
Print the input of the tasks to the given stream.
@ -527,8 +576,9 @@ Usage example:
abirun.py [FLOWDIR] abivars -vn ecut,nband => Print table with these input variables.
abirun.py [FLOWDIR] structures => Compare input/output structures of the tasks.
abirun.py [FLOWDIR] ebands => Print table with electronic properties.
abirun.py [FLOWDIR] hist => Print table with last iteratin in hist files.
abirun.py [FLOWDIR] hist => Print table with last iteration in hist files.
abirun.py [FLOWDIR] cycles => Print SCF cycles extracted from the output of the tasks.
abirun.py [FLOWDIR] dims => Print table with dimensions extracted from the output of the tasks.
abirun.py [FLOWDIR] inspect => Call matplotlib to inspect the tasks
abirun.py [FLOWDIR] tail => Use unix tail to follow the main output files of the flow.
abirun.py [FLOWDIR] deps => Show task dependencies.
@ -669,7 +719,7 @@ def get_parser(with_epilog=False):
# Subparser for scheduler command.
p_scheduler = subparsers.add_parser('scheduler', parents=[copts_parser],
help="Run all tasks with a Python scheduler. Requires scheduler.yml.")
help="Run all tasks with a Python scheduler. Requires scheduler.yml either in $PWD or ~/.abinit/abipy.")
p_scheduler.add_argument('-w', '--weeks', default=0, type=int, help="Number of weeks to wait.")
p_scheduler.add_argument('-d', '--days', default=0, type=int, help="Number of days to wait.")
p_scheduler.add_argument('-hs', '--hours', default=0, type=int, help="Number of hours to wait.")
@ -697,7 +747,7 @@ def get_parser(with_epilog=False):
# Subparser for cancel command.
p_cancel = subparsers.add_parser('cancel', parents=[copts_parser, flow_selector_parser],
help="Cancel the tasks in the queue. Not available if qtype == shell.")
help="Cancel the tasks in the queue. Not available if qtype is shell.")
p_cancel.add_argument("-r", "--rmtree", action="store_true", default=False, help="Remove flow directory.")
# Subparser for restart command.
@ -783,6 +833,10 @@ Specify the files to open. Possible choices:
p_cycles = subparsers.add_parser('cycles', parents=[copts_parser, flow_selector_parser],
help="Print SCF cycles extracted from the output of the tasks.")
# Subparser for dims.
p_dims = subparsers.add_parser('dims', parents=[copts_parser, flow_selector_parser],
help="Print table with dimensions extracted from the output of the tasks.")
# Subparser for inspect.
p_inspect = subparsers.add_parser('inspect', parents=[copts_parser, flow_selector_parser],
help="Call matplotlib to inspect the tasks (execute task.inspect method)")
@ -1294,6 +1348,10 @@ def main():
print(cycle)
print()
elif options.command == "dims":
flow_get_dims_dataframe(flow, nids=selected_nids(flow, options),
printout=True, with_colors=not options.no_colors)
elif options.command == "inspect":
tasks = flow.select_tasks(nids=options.nids, wslice=options.wslice)

View File

@ -95,6 +95,20 @@ def iflat(iterables):
yield it
def grouper(n, iterable, fillvalue=None):
"""
>>> assert grouper(3, "ABCDEFG", "x") == ["ABC", "DEF" "Gxx"]
"""
# https://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks/434411#434411
try:
from itertools import zip_longest
except ImportError:
from itertools import izip_longest as zip_longest
args = [iter(iterable)] * n
return list(zip_longest(fillvalue=fillvalue, *args))
#########################################################################################
# Sorting and ordering
#########################################################################################

View File

@ -347,7 +347,7 @@ class TestAbirun(ScriptTest):
# Test abirun commands requiring a flow (no submission)
for command in ["status", "debug", "debug_reset", "deps", "inputs", "corrections", "events",
"history", "handlers", "cancel", "tail", "inspect", "structures", "ebands", "hist",
"cycles", "tricky",]:
"cycles", "dims", "tricky",]:
r = env.run(self.script, flowdir, command, self.loglevel, self.verbose, *no_logo_colors,
expect_stderr=self.expect_stderr)
assert r.returncode == 0