abinit/tests/testbot.py

#!/usr/bin/env python
from __future__ import print_function, division, absolute_import  #, unicode_literals

import sys
import os
# Set ABI_PSPDIR env variable to point to the absolute path of Pspdir
os.environ["ABI_PSPDIR"] = os.path.abspath(os.path.join(os.path.dirname(__file__), "Pspdir"))
import platform
import shutil
import tempfile
import json

from os.path import join as pj, abspath as absp, basename
from socket import gethostname
from warnings import warn

import logging
logger = logging.getLogger(__name__)

try:
    from ConfigParser import SafeConfigParser, NoOptionError
except ImportError:
    # The ConfigParser module has been renamed to configparser in Python 3
    from configparser import ConfigParser as SafeConfigParser, NoOptionError

# Add the directory [...]/abinit/tests to $PYTHONPATH
pack_dir, x = os.path.split(absp(__file__))
pack_dir, x = os.path.split(pack_dir)
sys.path.insert(0, pack_dir)
import tests

__version__ = "0.3"
__author__ = "Matteo Giantomassi"

_my_name = basename(__file__) + "-" + __version__

abitests = tests.abitests
abenv = tests.abenv

from pymods import termcolor
from pymods.testsuite import BuildEnvironment
from pymods.jobrunner import TimeBomb, JobRunner, OMPEnvironment
from pymods.tools import pprint_table


def lazy__str__(func):
    "Lazy decorator for __str__ methods"
    def oncall(*args, **kwargs):
        self = args[0]
        return "\n".join(str(k) + " : " + str(v) for (k, v) in self.__dict__.items())
    return oncall


def _yesno2bool(string):
    string = string.lower().strip().replace('"', "").replace("'", "")
    if string == "yes":
        return True
    elif string == "no":
        return False
    raise ValueError("Cannot interpret string: %s" % string)


def _str2list(string):
    return [s.strip() for s in string.split(",") if s]


class TestBot(object):
    """
    This object drives the execution of the abinit automatic tests:

      1) Read setup options from the file testbot.cfg uploaded by the master on the worker.
      2) Initialize the job_runner and other objects used to run the tests.
      3) run the tests (see run method), and return the number of tests that failed.

    Step 1-2 are performed in the creation method.
    """
    _attrbs = {
      # name           --> (default, parser, info)
      # If default is None, the option must be specified.
      "slavename"        : (None, str, "Name of buildbot worker"),
      "type"             : ("",   str, "'ref' if this worker is the reference worker where all tests should pass"),
      # TODO: ncpus should be replaced by max_cpus for clarity reasons
      "ncpus"            : (None, int, "Max number of CPUs that can be used by TestBot"),
      "max_gpus"         : (0,    int, "Max number of GPUs that can be used by TestBot"),
      "mpi_prefix"       : ("",   str, "MPI runner"),
      "mpirun_np"        : ("",   str, "String used to execute `mpirun -n#NUM`"),
      "omp_num_threads"  : (0,    int, "Number of OpenMP threads. 0 if OpenMP should not be used"),
      "enable_mpi"       : (None, _yesno2bool, "yes if MPI is activated else no."),
      "enable_openmp"    : (None, _yesno2bool, "yes if OpenMP is activated else no."),
      "poe"              : ("", str, "This option is deprecated"),
      "poe_args"         : ("", str, "This option is deprecated"),
      "with_tdirs"       : ("", _str2list, "List of subsuites to include."),
      "without_tdirs"    : ("", _str2list, "List of subsuites to exclude."),
      "timeout_time"     : (900, float, "Timeout time in seconds."),
      "cygwin_dir"       : ("", str, "This option is deprecated"),
      "runmode"          : ("static", str, "'static to run all tests with 1 MPI proc and use np > 1 only for multiparallel tests'"),
      "keywords"         : ("", _str2list, "String with the keywords that should be selected/ignored."),
      "etsf_check"       : ("no", _yesno2bool, "yes to activate the validation of the netcdf files produced by Abinit."),
      "verbose"          : (0,    int, "Verbosity level"),
      "tmp_basedir"      : ("", str, "temporary folder where the tests will be executed and copied back"),
      "mpi_args"         : ("", str, "args passed to the mpi command"),
      "force_mpi"        : ("no", _yesno2bool, "force usage of of mpirun_np prefix"),
    }

    @classmethod
    def print_options(cls):
        """
        Print the different options supported in testbot.cfg with a
        brief description and the default value.
        """
        for opt_name, t in cls._attrbs.items():
            default, parser, info = t
            print("#", info)
            print(opt_name, "=", default)

        print("# NB If default is None, the option must be specified.")

    def __init__(self, testbot_cfg=None):

        # Read the options specified in the testbot configuration file.
        if testbot_cfg is None:
            basedir, _ = os.path.split(absp(__file__))
            testbot_cfg = pj(basedir, "testbot.cfg")

        parser = SafeConfigParser()
        parser.read(testbot_cfg)

        attrs2read = [
            "slavename",
            "type",
            "ncpus",
            "mpi_prefix",
            "mpirun_np",
            "omp_num_threads",
            "poe",
            "poe_args",
            "with_tdirs",
            "without_tdirs",
            "timeout_time",
            "cygwin_dir",
            "runmode",
            "keywords",
            "etsf_check",
            "verbose",
            "tmp_basedir",
            "mpi_args",
            "force_mpi",
        ]

        for attr in attrs2read:
            default, parse, info = TestBot._attrbs[attr]
            try:
                value = parser.get("testbot", attr)
            except NoOptionError:
                value = default

            if value is None:
                # Write out the cfg file and raise
                for section in parser.sections():
                    print("[" + section + "]")
                    for opt in parser.options(section):
                        print(opt + " = " + parser.get(section, opt))
                raise ValueError("Mandatory option %s is not declared" % attr)

            self.__dict__[attr] = parse(value)

        if self.with_tdirs and self.without_tdirs:
            raise ValueError("with_tdirs and without_tdirs attribute are mutually exclusive")

        # TODO: ncpus should be replaced by max_cpus for clarity reasons
        self.max_cpus = self.ncpus

        system, node, release, version, machine, processor = platform.uname()
        print("Running on %s -- worker %s -- system %s -- max_cpus %s -- Python %s -- %s" % (
              gethostname(), self.slavename, system, self.max_cpus, platform.python_version(), _my_name))

        # Set the logger level.
        # loglevel is bound to the string value obtained from the command line argument.
        # Convert to upper case to allow the user to specify --loglevel=DEBUG or --loglevel=debug
        # numeric_level = getattr(logging, options.loglevel.upper(), None)
        numeric_level = getattr(logging, "ERROR", None)

        if not isinstance(numeric_level, int):
            raise ValueError('Invalid log level: %s' % numeric_level)
        logging.basicConfig(level=numeric_level)

        # Read testfarm configuration file.
        build_examples = abenv.apath_of(pj("config", "specs", "testfarm.conf"))

        parser = SafeConfigParser()
        parser.read(build_examples)

        if self.slavename not in parser.sections():
            raise ValueError("%s is not a valid buildbot worker." % self.slavename)

        # TODO
        # Consistency check
        # d = self.__dict__
        # for attr, (default, parse) in TestBot._attrbs.items():
        #   try:
        #     d[attr] = parser.get(self.slavename, attr)
        #   except NoOptionError:
        #     print "option %s is not declared" % attr
        #     #raise ValueError(err_msg)
        #     d[attr] = default
        #     if default is None:
        #       err_msg = "option %s is not declared" % attr
        #       raise ValueError(err_msg)
        #   d[attr] = parse(d[attr])

        # 2 )Initialize the job_runner.
        self.build_env = build_env = BuildEnvironment(os.curdir, cygwin_instdir=self.__dict__["cygwin_dir"])
        self.build_env.set_buildbot_builder(self.slavename)

        # TODO: These parameters should be passed to testbot.cfg
        from tests.pymods.devtools import number_of_cpus, number_of_gpus
        #max_cpus = max(1, number_of_cpus())
        if "HAVE_GPU" not in self.build_env.defined_cppvars:
            self.max_gpus = 0
        else:
            self.max_gpus = max(0, number_of_gpus())

        if build_env.has_bin("timeout") and self.timeout_time > 0:
            # We can run executables under the control of timeout.c
            timeout_path = build_env.path_of_bin("timeout")
            timebomb = TimeBomb(self.timeout_time, exec_path=timeout_path)
        else:
            warn("Cannot find timeout executable at: %s" % build_env.path_of_bin("timeout"))
            timebomb = TimeBomb(self.timeout_time)

        print("Initalizing JobRunner for sequential runs.")
        self.seq_runner = JobRunner.sequential(timebomb=timebomb)
        print(self.seq_runner)

        if self.has_mpi:
            print("Initalizing MPI JobRunner from self.__dict__")
            self.mpi_runner = JobRunner.fromdict(self.__dict__, timebomb=timebomb)
            print(self.mpi_runner)

        if self.omp_num_threads > 0:
            print("Initalizing OMP environment with omp_num_threads %d " % self.omp_num_threads)
            omp_env = OMPEnvironment(OMP_NUM_THREADS=self.omp_num_threads)
            self.seq_runner.set_ompenv(omp_env)
            if self.has_mpi:
                self.mpi_runner.set_ompenv(omp_env)

        self.targz_fnames = []
        print(self)

        # Initialize the table to store the final results.
        # The table include all the abinit tests (also those that will be skipped)
        # values are initialized with None.
        # FIXME
        database = abitests.get_database()
        res_table = database.init_result_table()
        self.summary = TestBotSummary(res_table)
        #print(self.summary)

    def __str__(self):
        """String representation."""
        lines = []
        app = lines.append
        for attr_name, t in self._attrbs.items():
            default, parser, info = t
            value = getattr(self, attr_name, "undefined")
            app("%s = %s" % (attr_name, value))

        return "\n".join(lines)

    @property
    def has_mpi(self):
        """True if we have the MPI runner"""
        return bool(self.mpirun_np) or bool(self.poe)

    @property
    def has_openmp(self):
        """True if the tests must be executed with OpenMP threads."""
        return self.omp_num_threads > 0

    def run_tests_with_np(self, mpi_nprocs, suite_args=None, runmode="static"):
        """
        Run the tests specified by suite_args, using mpi_nprocs MPI processors.

        Returns: (nfailed, npassed, nexecuted)
        """
        # Compute number of python processes, note that self.omp_num_threads might be zero.
        py_nprocs = self.max_cpus // (mpi_nprocs * max(self.omp_num_threads, 1))
        if py_nprocs < 1:
            raise RuntimeError("py_nprocs = %s" % py_nprocs)

        test_suite = abitests.select_tests(suite_args, keys=self.keywords, regenerate=False)

        # Create workdir.
        workdir_name = "TestBot_MPI" + str(mpi_nprocs)
        if self.has_openmp:
            workdir_name += "_OMP" + str(self.omp_num_threads)

        if os.path.exists(workdir_name):
            raise RuntimeError("%s already exists!" % workdir_name)
        else:
            os.mkdir(workdir_name)

        if self.tmp_basedir:
            workdir = os.path.join(tempfile.mkdtemp(dir=self.tmp_basedir), workdir_name)
        else:
            workdir = workdir_name

        # Run the tests.
        if self.has_openmp:
            msg = "Running ntests = %s, MPI_nprocs = %s, OMP_nthreads %s, Max GPUs: %s, py_nprocs = %s..." % (
                  test_suite.full_length, mpi_nprocs, self.omp_num_threads, self.max_gpus, py_nprocs)
        else:
            msg = "Running ntests = %s, MPI_nprocs = %s, Max GPUs: %s py_nprocs = %s..." % (
                  test_suite.full_length, mpi_nprocs, self.max_gpus, py_nprocs)
        print(msg)

        job_runner = self.seq_runner
        if mpi_nprocs > 1 or self.force_mpi:
            job_runner = self.mpi_runner

        results = test_suite.run_tests(self.build_env, workdir, job_runner,
                                       mpi_nprocs=mpi_nprocs,
                                       omp_nthreads=self.omp_num_threads,
                                       max_cpus=self.max_cpus,
                                       max_gpus=self.max_gpus,
                                       py_nprocs=py_nprocs,
                                       runmode=self.runmode,
                                       verbose=self.verbose,
                                       make_html_diff=1)

        # Cannot use this option on the test farm because hdf5 is not thread/process-safe.
        # See https://www.hdfgroup.org/hdf5-quest.html#tsafe
                                       # etsf_check=self.etsf_check)

        if results is None:
            print("Test suite is empty, returning 0 0 0 ")
            return 0, 0, 0

        # Store the results in the summary table,
        # taking into account that an input file might be executed multiple times
        # with a different environment (MPI, OMP ...)
        run_info = {}
        # run_info = [self.build_env, workdir, runner, mpi_nprocs, py_nprocs]
        self.summary.merge_results(test_suite, run_info)

        # Push the location of the tarball file
        self.targz_fnames.append(results.targz_fname)

        if self.tmp_basedir:
            for fn in ["results.tar.gz", "suite_report.html"]:
                try:
                    shutil.copy2(os.path.join(workdir, fn), workdir_name)
                except:
                    print("Could not copy back file ", fn)

        return results.nfailed, results.npassed, results.nexecuted

    def run(self):
        """
        Run all the automatic tests depending on the environment and the options
        specified in the testbot.cfg configuration file.
        Return the number of failing tests (+ no. passed tests if this is the reference worker).
        """
        # If with_tdirs and without_tdirs are not given => execute all tests.
        # else create a list of strings with the suites that should be executed|excluded.
        suite_args = None

        # XG130410 Crude hack, to avoid paral and mpiio test directories
        # in case of enable_mpi=no in config/specs/testfarm.conf
        if not self.has_mpi:
            suite_args = "paral- mpiio-".split()

        if self.with_tdirs:
            suite_args = self.with_tdirs

        elif self.without_tdirs:
            # Append "-" to the string to signal that the suite should be excluded.
            suite_args = "- ".join([s for s in self.without_tdirs]) + "-"
            suite_args = suite_args.split()

        if self.runmode == "static":
            # Old mode: run all available tests with 1 MPI proc here,
            # then use MPI mode in mp_suites (paral/mpiio)
            np_list = [2, 4, 10, 24, 64]
            #nfailed, npassed, nexecuted = 0, 0 , 0
            nfailed, npassed, nexecuted = self.run_tests_with_np(1, suite_args=suite_args, runmode=self.runmode)
        else:
            # New mode: run all available tests with 2 MPI procs here,
            # then enter the mp_suites with np_list CPUs
            np_list = [4, 10, 24, 64]
            nfailed, npassed, nexecuted = self.run_tests_with_np(2, suite_args=suite_args, runmode=self.runmode)

        if self.has_mpi:
            # Run the parallel tests in the multi-parallel suites.
            mp_suites = abitests.multi_parallel_suites()
            suite_args = [suite.name for suite in mp_suites]

            # Prune dirs.
            if self.with_tdirs:
                suite_args = [s for s in self.with_tdirs if s in suite_args]

            elif self.without_tdirs:
                suite_args = [s for s in suite_args if s not in self.without_tdirs]

            for np in np_list:
                if np > self.max_cpus or not suite_args:
                    print("Skipping tests with np: %s as max_cpus: %s" % (np, self.max_cpus))
                    continue

                print("Running multi-parallel tests with %s MPI processors, suite_args %s" % (np, suite_args))
                para_nfailed, para_npassed, para_nexec = self.run_tests_with_np(np, runmode="static", suite_args=suite_args)

                # Accumulate the counters.
                nfailed += para_nfailed
                npassed += para_npassed
                nexecuted += para_nexec

        table = self.summary.to_table()
        pprint_table(table)

        self.summary.json_dump("testbot_summary.json")

        # Empty list of tests (usually due to the use of with, without options)
        # Create file to signal this condition and return 0
        if nexecuted == 0:
            print("No file found")
            with open("__emptylist__", "wt") as fh:
                fh.write("nfailed = %d, npassed = %d, nexecuted = %d" % (nfailed, npassed, nexecuted))
                return 0

        # The status error depends on the builder type.
        if self.type == "ref":
            # Reference worker --> all the tests must pass.
            return nfailed + npassed
        else:
            return nfailed

    def finalize(self):
        """
        This piece of code has been extracted from analysis9
        """
        fname = "testbot_summary.json"
        with open(fname, "rt") as data_file:
           d = json.load(data_file)

        # FIXME What is this?
        d['tag'] = sys.argv[1]

        with open(fname, 'wt') as data_file:
           json.dump(d, data_file)

        try:
            tests_status = dict(zip(d["summary_table"][0],d["summary_table"][1]))

            dashline = "=========================================================================="
            print( dashline )
            print(     "          Serie   #failed   #passed  #succes  #skip  |   #CPU      #WALL")
            print(dashline)
            rtime = 0.0
            ttime = 0.0
            paral = ''
            mpiio = ''
            for t, s in sorted(tests_status.items()):
                kt = False
                for i in d[t].keys():
                   if  d[t][i]['status'] != "skipped":
                      kt = True
                      rtime += d[t][i]['run_etime']
                      ttime += d[t][i]['tot_etime']
                if kt:
                     temp = ''.join(['%5s   |' % l for l in  s.split('/') ])
                     temp = '%15s | %10s %7.1f  | %7.1f' % (t,temp,rtime,ttime)
                     if t == 'mpiio':
                        mpiio = temp
                     elif t == 'paral':
                        paral = temp
                     else:
                        print(temp)
                rtime = ttime = 0.0

            print(dashline)
            putline = 0
            if paral != '':
                print(paral)
                putline=1
            if mpiio != '':
                print(mpiio)
                putline=1
            if putline == 1:
                print(dashline)
        except:
            print("no results")
            sys.exit(99)


class TestBotSummary(object):
    """Stores the final results of the tests performed by TestBot."""

    _possible_status = ["failed", "passed", "succeeded", "skipped"]

    def __init__(self, res_table):
        self.res_table = res_table
        self.failed = []
        self.passed = []

    @lazy__str__
    def __str__(self): pass

    def _min_status(self, items):
        indices = [self._possible_status.index(item) for item in items]
        return self._possible_status[min(indices)]

    def to_table(self):
        """
        Return a table (list of lists whose elements are strings).
        The table has the format:
            - [ suite_name1, suite_name2, ... ]
            - [ stats1,      stats2,      ... ]

        where stats is given by "nfail/npass/nsucces"
        """
        def stats2string(stats):
            "helper function that returns (nfail/npass/nsucc/nskipped)"
            return "/".join(str(stats[k]) for k in self._possible_status)

        table = [self.suite_names()]
        rows = []
        for suite_name in self:
            status, stats = self.status_of_suite(suite_name)
            rows.append(stats2string(stats))
        table.append(rows)

        return table

    def merge_results(self, test_suite, run_info):
        # assert test_suite._executed
        self.run_info = run_info

        for test in test_suite:
            d = self.res_table[test.suite_name][test.id]

            if "status" not in d:
                # Entry is not initialized. Save the status of this test.
                d["status"] = test.status
                d["number_of_runs"] = 1
                d["run_etime"] = test.run_etime
                d["tot_etime"] = test.tot_etime
            else:
                # Handle the case where we have executed the same test but with a different setup.
                # Ordering is: failed < passed < succeeded
                # hence a test is marked as "failed" if at least one run failed.
                d["status"] = self._min_status([test.status, d["status"]])
                d["number_of_runs"] += 1
                d["run_etime"] += test.run_etime
                d["tot_etime"] += test.tot_etime

        # Store the full_id of the tests that (failed|passed).
        for test in test_suite.failed_tests():
            self.failed.append(test.full_id)

        for test in test_suite.passed_tests():
            self.passed.append(test.full_id)

    def __iter__(self):
        """Iterate over the suite names in alphabetical order."""
        for suite_name in self.suite_names():
            yield suite_name

    def suite_names(self):
        """List of suite names in alphabetical order."""
        return sorted(list(self.res_table.keys()))

    def status_of_suite(self, suite_name):
        """
        Args:
            suite_name: string with the name of the suite.

        return: (suite_status, stats) where
            suite_status is one of the possile status in `_possible_status`
            stats is a dictionary : {failed:1, passed:2, succeeded:0, skipped:0}
        """
        # Initialize stats setting the keys to 0
        stats = dict.fromkeys(self._possible_status, 0)

        for test_id, d in self.res_table[suite_name].items():
            if "status" not in d:
                d["status"] = "skipped"
            stats[d["status"]] += 1

        for status in self._possible_status:
            if stats[status] > 0:
                suite_status = status
                break
        else:
            # Ignore error if suite is empty else raise.
            if not self.res_table[suite_name]:
                suite_status = "succeeded"
            else:
                raise RuntimeError("[%s] Wrong list of status values!" % suite_name)

        return suite_status, stats

    def json_dump(self, fname):
        """
        Save self.res_table, self.failed, self.passed in json format.
        The file will be transferred from the buildbot worker to the buildbot master.
        """
        # list of strings with the name of the tests that (failed|passed)
        d = {}
        d["failed"] = self.failed
        d["passed"] = self.passed
        d["summary_table"] = self.to_table()

        for suite_name in self:
            print("---", suite_name, self.res_table[suite_name])
            if suite_name in d:
                #raise KeyError("Cannot overwrite key %s" % suite_name)
                print("Warning: About to overwrite key %s" % suite_name)
            d[suite_name] = self.res_table[suite_name]


        with open(fname, "wt") as fh:
            json.dump(d, fh)


def main():
    if "--help" in sys.argv or "-h" in sys.argv:
        # Print help and exit.
        TestBot.print_options()
        return 0

    # Configuration file (hardcoded or from command line)
    testbot_cfg = None
    if len(sys.argv) > 1:
        testbot_cfg = sys.argv[1]
        print("Reading testbot.cfg configuration file from: ", testbot_cfg)

    # Disable colors
    termcolor.enable(False)

    testbot = TestBot(testbot_cfg)
    if "--dry-run" in sys.argv or "-d" in sys.argv:
        print("Running in dry-run mode, will return immediately.")
        print(testbot)
        return 0

    return testbot.run()


if __name__ == "__main__":
    #import multiprocessing
    #multiprocessing.set_start_method("fork")  # Ensure compatibility on macOS/Linux
    sys.exit(main())