Add output parser for abinit dimensions

2017-11-30 12:01:28 +01:00 · 2017-11-30 12:01:28 +01:00 · 89645d377a
parent a6c1143771
commit 89645d377a
6 changed files with 239 additions and 18 deletions
--- a/abipy/abio/outputs.py
+++ b/abipy/abio/outputs.py
@ -414,6 +414,7 @@ class AbinitOutputFile(AbinitTextFile, NotebookWriter):
        return self.to_string()

    def to_string(self, verbose=0):
+        """String representation."""
        lines = ["ndtset: %d, completed: %s" % (self.ndtset, self.run_completed)]
        app = lines.append

@ -446,6 +447,119 @@ class AbinitOutputFile(AbinitTextFile, NotebookWriter):

        return "\n".join(lines)

+    def get_dims_spginfo_dataset(self, verbose=0):
+        """
+        Parse the section with the dimensions of the calculation.
+
+        Args:
+            verbose: Verbosity level.
+
+        Return: (dims_dataset, spginfo_dataset)
+            where dims_dataset[i] is an OrderedDict with the dimensions of dataset `i`
+            spginfo_dataset[i] is a dictionary with space group information.
+        """
+        # If single dataset, we have to parse
+        #
+        #  Symmetries : space group Fd -3 m (#227); Bravais cF (face-center cubic)
+        # ================================================================================
+        #  Values of the parameters that define the memory need of the present run
+        #      intxc =       0    ionmov =       0      iscf =       7    lmnmax =       6
+        #      lnmax =       6     mgfft =      18  mpssoang =       3    mqgrid =    3001
+        #      natom =       2  nloc_mem =       1    nspden =       1   nspinor =       1
+        #     nsppol =       1      nsym =      48    n1xccc =    2501    ntypat =       1
+        #     occopt =       1   xclevel =       2
+        # -    mband =           8        mffmem =           1         mkmem =          29
+        #        mpw =         202          nfft =        5832          nkpt =          29
+        # ================================================================================
+        # P This job should need less than                       3.389 Mbytes of memory.
+        #   Rough estimation (10% accuracy) of disk space for files :
+        # _ WF disk file :      0.717 Mbytes ; DEN or POT disk file :      0.046 Mbytes.
+        # ================================================================================
+
+        # If multi datasets we have to parse:
+
+        #  DATASET    2 : space group F-4 3 m (#216); Bravais cF (face-center cubic)
+        # ================================================================================
+        #  Values of the parameters that define the memory need for DATASET  2.
+        #      intxc =       0    ionmov =       0      iscf =       7    lmnmax =       2
+        #      lnmax =       2     mgfft =      12  mpssoang =       3    mqgrid =    3001
+        #      natom =       2  nloc_mem =       1    nspden =       1   nspinor =       1
+        #     nsppol =       1      nsym =      24    n1xccc =    2501    ntypat =       2
+        #     occopt =       1   xclevel =       1
+        # -    mband =          10        mffmem =           1         mkmem =           2
+        #        mpw =          69          nfft =        1728          nkpt =           2
+        # ================================================================================
+        # P This job should need less than                       1.331 Mbytes of memory.
+        #   Rough estimation (10% accuracy) of disk space for files :
+        # _ WF disk file :      0.023 Mbytes ; DEN or POT disk file :      0.015 Mbytes.
+        # ================================================================================
+
+        magic = "Values of the parameters that define the memory need"
+        memory_pre = "P This job should need less than"
+        magic_exit = "------------- Echo of variables that govern the present computation"
+        filesizes_pre = "_ WF disk file :"
+
+        def parse_spgline(line):
+            """Parse the line with space group info, return dict."""
+            # Could use regular expressions ...
+            i = line.find("space group")
+            spg_str, brav_str = line[i:].replace("space group", "").split(";")
+            toks = spg_str.split()
+            return {
+                "spg_symbol": "".join(toks[:-1]),
+                "spg_number": int(toks[-1].replace("(", "").replace(")", "").replace("#", "")),
+                "bravais": brav_str.strip(),
+            }
+
+        from abipy.tools.numtools import grouper
+        dims_dataset, spginfo_dataset = OrderedDict(), OrderedDict()
+        inblock = 0
+        with open(self.filepath, "rt") as fh:
+            for line in fh:
+                line = line.strip()
+                if line.startswith(magic_exit):
+                    break
+                if not line or line.startswith("===") or line.startswith("---") or line.startswith("Rough estimation"):
+                    continue
+
+                if verbose: print("inblock:", inblock, " at line:", line)
+
+                if line.startswith("DATASET") or line.startswith("Symmetries :"):
+                    # Get dataset index, parse space group and lattice info, init new dims dict.
+                    inblock = 1
+                    if line.startswith("Symmetries :"):
+                        # No multidataset
+                        dtindex = 1
+                    else:
+                        tokens = line.split()
+                        dtindex = int(tokens[1])
+
+                    dims_dataset[dtindex] = dims = OrderedDict()
+                    spginfo_dataset[dtindex] = parse_spgline(line)
+                    continue
+
+                if inblock == 1 and line.startswith(magic):
+                    inblock = 2
+                    continue
+
+                if inblock == 2:
+                    # Lines with data.
+                    if line.startswith(memory_pre):
+                        dims["mem_per_proc_mb"] = float(line.replace(memory_pre, "").split()[0])
+                    elif line.startswith(filesizes_pre):
+                        tokens = line.split()
+                        mbpos = [i - 1 for i, t in enumerate(tokens) if t.startswith("Mbytes")]
+                        assert len(mbpos) == 2
+                        dims["wfk_size_mb"] = float(tokens[mbpos[0]])
+                        dims["denpot_size_mb"] = float(tokens[mbpos[1]])
+                    else:
+                        if line and line[0] == "-": line = line[1:]
+                        tokens = grouper(2, line.replace("=", "").split())
+                        if verbose: print("tokens:", tokens)
+                        dims.update([(t[0], int(t[1])) for t in tokens])
+
+            return dims_dataset, spginfo_dataset
+
    def next_gs_scf_cycle(self):
        """
        Return the next :class:`GroundStateScfCycle` in the file. None if not found.
--- a/abipy/abio/tests/test_outputs.py
+++ b/abipy/abio/tests/test_outputs.py
@ -30,7 +30,6 @@ class AbinitOutputTest(AbipyTest):
        with AbinitOutputFile(abo_path) as abo:
            repr(abo); str(abo)
            assert abo.to_string(verbose=2)
-
            assert abo.version == "8.0.6"
            assert abo.run_completed
            assert not abo.dryrun_mode
@ -43,6 +42,25 @@ class AbinitOutputTest(AbipyTest):
            assert abo.initial_structure == abo.final_structure
            abo.diff_datasets(1, 2, dryrun=True)

+            # Test the parsing of dimension and spginfo
+            dims_dataset, spginfo_dataset = abo.get_dims_spginfo_dataset()
+            assert len(dims_dataset) == 2 and list(dims_dataset.keys()) == [1, 2]
+            dims1 = dims_dataset[1]
+            assert dims1["iscf"] == 7
+            assert dims1["nfft"] == 5832
+            self.assert_almost_equal(dims1["mem_per_proc_mb"], 3.045)
+            self.assert_almost_equal(dims1["wfk_size_mb"], 0.717)
+            self.assert_almost_equal(dims1["denpot_size_mb"], 0.046)
+            assert spginfo_dataset[1]["spg_symbol"] == "Fd-3m"
+            assert spginfo_dataset[1]["spg_number"] == 227
+            assert spginfo_dataset[1]["bravais"] == "Bravais cF (face-center cubic)"
+            dims2 = dims_dataset[2]
+            assert dims2["iscf"] == -2
+            assert dims2["n1xccc"] == 2501
+            self.assert_almost_equal(dims2["mem_per_proc_mb"], 1.901)
+            self.assert_almost_equal(dims2["wfk_size_mb"], 0.340)
+            self.assert_almost_equal(dims2["denpot_size_mb"], 0.046)
+
            print(abo.events)
            gs_cycle = abo.next_gs_scf_cycle()
            assert gs_cycle is not None
@ -107,6 +125,20 @@ class AbinitOutputTest(AbipyTest):

            assert abo.initial_structure.abi_spacegroup is not None

+            # This to test get_dims_spginfo_dataset with one dataset.
+            dims_dataset, spg_dataset = abo.get_dims_spginfo_dataset()
+            assert len(dims_dataset) == 1
+            dims = dims_dataset[1]
+            assert dims["nsppol"] == 1
+            assert dims["nsym"] == 48
+            assert dims["nkpt"] == 29
+            self.assert_almost_equal(dims["mem_per_proc_mb"], 3.389)
+            self.assert_almost_equal(dims["wfk_size_mb"], 0.717)
+            self.assert_almost_equal(dims["denpot_size_mb"], 0.046)
+            assert spg_dataset[1]["spg_symbol"] == "Fd-3m"
+            assert spg_dataset[1]["spg_number"] == 227
+            assert spg_dataset[1]["bravais"] == "Bravais cF (face-center cubic)"
+
    def test_all_outputs_in_tests(self):
        """
        Try to parse all Abinit output files inside the Abinit `tests` directory.
--- a/abipy/eph/eph.py
+++ b/abipy/eph/eph.py
@ -92,7 +92,7 @@ class A2f(object):
        # TODO: Add ElectronDos
        app("Isotropic lambda: %.3f" % (self.lambda_iso))
        app("Omega_log: %s [eV], %s [K]" % (self.omega_log, self.omega_log * abu.eV_to_K))
-        for mustar in (0.1, 0.2):
+        for mustar in (0.1, 0.12, 0.2):
            app("\tFor mustar %s: McMillan Tc: %s [K]" % (mustar, self.get_mcmillan_tc(mustar)))

        if verbose:
@ -114,20 +114,15 @@ class A2f(object):
        """
        #return 270 / abu.eV_to_K
        iw = self.iw0 + 1
-        #iw = self.iw0 + 100
        wmesh, a2fw = self.mesh[iw:], self.values[iw:]
-        #wmesh = wmesh * units.eV_to_Ha
-        #wmesh = wmesh * abu.eV_to_THz

        #ax, fig, plt = get_ax_fig_plt(ax=None)
        #ax.plot(wmesh, a2fw / wmesh * np.log(wmesh))
        #plt.show()

-        integral = simps(a2fw / wmesh * np.log(wmesh), x=wmesh)
-
-        #return np.exp(2.0 / self.lambda_iso * integral) * units.Ha_to_eV
-        #return np.exp(2.0 / self.lambda_iso * integral) / abu.eV_to_THz
-        return np.exp(2.0 / self.lambda_iso * integral) #/ abu.eV_to_THz
+        fw = a2fw / wmesh * np.log(wmesh)
+        integral = simps(fw, x=wmesh)
+        return np.exp(1.0 / self.lambda_iso * integral)

    def get_moment(self, n, spin=None, cumulative=False):
        r"""
@ -166,6 +161,9 @@ class A2f(object):
    def get_mcmillan_tc(self, mustar):
        """
        Computes the critical temperature with the McMillan equation and the input mustar.
+
+        Return:
+            Tc in Kelvin degrees
        """
        tc = (self.omega_log / 1.2) * \
            np.exp(-1.04 * (1.0 + self.lambda_iso) / (self.lambda_iso - mustar * (1.0 + 0.62 * self.lambda_iso)))
@ -173,7 +171,11 @@ class A2f(object):

    def get_mustar_from_tc(self, tc):
        """
-        Return the value of mustar that gives the critical temperature tc in K in the McMillan equation.
+        Return the value of mustar that gives the critical temperature `tc` in the McMillan equation.
+
+        Args:
+            tc:
+                Critical temperature in Kelvin
        """
        l = self.lambda_iso
        num = l + (1.04 * (1 + l) / np.log(1.2 * abu.kb_eVK * tc / self.omega_log))
@ -383,6 +385,7 @@ class A2f(object):
        Returns:
            `matplotlib` figure
        """
+        # TODO start and stop to avoid singularity in Mc Tc
        mustar_values = np.linspace(start, stop, num=num)
        tc_vals = [self.get_mcmillan_tc(mustar) for mustar in mustar_values]

@ -426,7 +429,6 @@ class A2Ftr(object):
            raise ValueError("Cannot find zero in energy mesh")


-
 # TODO Change name.
 class EphFile(AbinitNcFile, Has_Structure, Has_ElectronBands, NotebookWriter):
    """
@ -471,9 +473,10 @@ class EphFile(AbinitNcFile, Has_Structure, Has_ElectronBands, NotebookWriter):
        app("")
        # E-PH section
        app(marquee("E-PH calculation", mark="="))
-        app(self.a2f_qcoarse.to_string(title="A2f coarse:", verbose=verbose))
+        app("Has transport a2Ftr(w): %s" % self.has_a2ftr)
+        app(self.a2f_qcoarse.to_string(title="A2f(w) on the ab-initio q-mesh:", verbose=verbose))
        app("")
-        app(self.a2f_qintp.to_string(title="A2f interpolated:", verbose=verbose))
+        app(self.a2f_qintp.to_string(title="A2f(w) interpolated on the dense q-mesh:", verbose=verbose))

        return "\n".join(lines)

--- a/abipy/scripts/abirun.py
+++ b/abipy/scripts/abirun.py
@ -327,6 +327,55 @@ def flow_compare_hist(flow, nids=None, with_spglib=False, verbose=0,
    return df


+def flow_get_dims_dataframe(flow, nids=None, printout=False, with_colors=False):
+    """
+    Analyze output files produced by Abinit tasks. Print pandas DataFrame with dimensions.
+
+    Args:
+        nids: List of node identifiers. By defaults all nodes are shown
+        printout: True to print dataframe.
+        with_colors: True if task status should be colored.
+    """
+    abo_paths, index, status, abo_relpaths, task_classes = [], [], [], [], []
+
+    for task in flow.iflat_tasks(nids=nids):
+        if task.status not in (flow.S_OK, flow.S_RUN): continue
+        #if not task.is_abinit_task: continue
+
+        abo_paths.append(task.output_file.path)
+        index.append(task.pos_str)
+        status.append(task.status.colored if with_colors else str(task.status))
+        abo_relpaths.append(os.path.relpath(task.output_file.relpath))
+        task_classes.append(task.__class__.__name__)
+
+    if not abo_paths: return
+    # Get dimensions from output files.
+    rows = []
+    for p in abo_paths:
+        print(p)
+        with abilab.AbinitOutputFile(p) as abo:
+            try:
+                dims_dataset, spg_dataset = abo.get_dims_spginfo_dataset()
+            except Exception as exc:
+                cprint("Exception while trying to get dimensions from %s\n%s" % (p, str(exc)), "yellow")
+                continue
+            rows.append(dims_dataset[1])
+
+    import pandas as pd
+    df = pd.DataFrame(rows, index=index, columns=list(rows[0].keys()))
+
+    # Add columns to the dataframe.
+    status = [str(s) for s in status]
+    df["task_class"] = task_classes
+    df["relpath"] = abo_relpaths
+    df["status"] = status
+
+    if printout:
+        abilab.print_dataframe(df, title="Table with Abinit dimensions:\n")
+
+    return df
+
+
 def flow_compare_abivars(flow, varnames, nids=None, wslice=None, printout=False, with_colors=False):
    """
    Print the input of the tasks to the given stream.
@ -527,8 +576,9 @@ Usage example:
  abirun.py [FLOWDIR] abivars -vn ecut,nband  => Print table with these input variables.
  abirun.py [FLOWDIR] structures            => Compare input/output structures of the tasks.
  abirun.py [FLOWDIR] ebands                => Print table with electronic properties.
-  abirun.py [FLOWDIR] hist                  => Print table with last iteratin in hist files.
+  abirun.py [FLOWDIR] hist                  => Print table with last iteration in hist files.
  abirun.py [FLOWDIR] cycles                => Print SCF cycles extracted from the output of the tasks.
+  abirun.py [FLOWDIR] dims                  => Print table with dimensions extracted from the output of the tasks.
  abirun.py [FLOWDIR] inspect               => Call matplotlib to inspect the tasks
  abirun.py [FLOWDIR] tail                  => Use unix tail to follow the main output files of the flow.
  abirun.py [FLOWDIR] deps                  => Show task dependencies.
@ -669,7 +719,7 @@ def get_parser(with_epilog=False):

    # Subparser for scheduler command.
    p_scheduler = subparsers.add_parser('scheduler', parents=[copts_parser],
-        help="Run all tasks with a Python scheduler. Requires scheduler.yml.")
+        help="Run all tasks with a Python scheduler. Requires scheduler.yml either in $PWD or ~/.abinit/abipy.")
    p_scheduler.add_argument('-w', '--weeks', default=0, type=int, help="Number of weeks to wait.")
    p_scheduler.add_argument('-d', '--days', default=0, type=int, help="Number of days to wait.")
    p_scheduler.add_argument('-hs', '--hours', default=0, type=int, help="Number of hours to wait.")
@ -697,7 +747,7 @@ def get_parser(with_epilog=False):

    # Subparser for cancel command.
    p_cancel = subparsers.add_parser('cancel', parents=[copts_parser, flow_selector_parser],
-        help="Cancel the tasks in the queue. Not available if qtype == shell.")
+        help="Cancel the tasks in the queue. Not available if qtype is shell.")
    p_cancel.add_argument("-r", "--rmtree", action="store_true", default=False, help="Remove flow directory.")

    # Subparser for restart command.
@ -783,6 +833,10 @@ Specify the files to open. Possible choices:
    p_cycles = subparsers.add_parser('cycles', parents=[copts_parser, flow_selector_parser],
        help="Print SCF cycles extracted from the output of the tasks.")

+    # Subparser for dims.
+    p_dims = subparsers.add_parser('dims', parents=[copts_parser, flow_selector_parser],
+        help="Print table with dimensions extracted from the output of the tasks.")
+
    # Subparser for inspect.
    p_inspect = subparsers.add_parser('inspect', parents=[copts_parser, flow_selector_parser],
        help="Call matplotlib to inspect the tasks (execute task.inspect method)")
@ -1294,6 +1348,10 @@ def main():
            print(cycle)
            print()

+    elif options.command == "dims":
+        flow_get_dims_dataframe(flow, nids=selected_nids(flow, options),
+                                printout=True, with_colors=not options.no_colors)
+
    elif options.command == "inspect":
        tasks = flow.select_tasks(nids=options.nids, wslice=options.wslice)

--- a/abipy/tools/numtools.py
+++ b/abipy/tools/numtools.py
@ -95,6 +95,20 @@ def iflat(iterables):
                yield it


+def grouper(n, iterable, fillvalue=None):
+    """
+    >>> assert grouper(3, "ABCDEFG", "x") == ["ABC", "DEF" "Gxx"]
+    """
+    # https://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks/434411#434411
+    try:
+        from itertools import zip_longest
+    except ImportError:
+        from itertools import izip_longest as zip_longest
+
+    args = [iter(iterable)] * n
+    return list(zip_longest(fillvalue=fillvalue, *args))
+
+
 #########################################################################################
 # Sorting and ordering
 #########################################################################################
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@ -347,7 +347,7 @@ class TestAbirun(ScriptTest):
        # Test abirun commands requiring a flow (no submission)
        for command in ["status", "debug", "debug_reset", "deps", "inputs", "corrections", "events",
                        "history", "handlers", "cancel", "tail", "inspect", "structures", "ebands", "hist",
-                        "cycles", "tricky",]:
+                        "cycles", "dims", "tricky",]:
            r = env.run(self.script, flowdir, command, self.loglevel, self.verbose, *no_logo_colors,
                        expect_stderr=self.expect_stderr)
            assert r.returncode == 0