[opt-viewer] Add progress indicators (PR33522)

Summary:
Provide feedback to users of opt-diff.py, opt-stats.py, and opt-viewer.py,
on how many YAML files have finished being processed, and how many HTML
files have been generated. This feedback is particularly helpful for
opt-viewer.py, which may take a long time to complete when given many
large YAML files as input.

The progress indicators use simple output such as the following:

```
Reading YAML files...
    9 of 1197
```

Test plan:
Run `utils/opt-viewer/opt-*.py` on a CentOS and macOS machine, using
Python 3.4 and Python 2.7 respectively, and ensure the output is
formatted well on both.

Reviewers: anemet, davidxl

Reviewed By: anemet

Subscribers: simon.f.whittaker, llvm-commits

Differential Revision: https://reviews.llvm.org/D34735

llvm-svn: 306726
This commit is contained in:
Brian Gesiak 2017-06-29 18:56:25 +00:00
parent 804eb1fbab
commit 5e0a9465c4
5 changed files with 130 additions and 46 deletions

View File

@ -44,20 +44,21 @@ if __name__ == '__main__':
default=cpu_count(), default=cpu_count(),
type=int, type=int,
help='Max job count (defaults to %(default)s, the current CPU count)') help='Max job count (defaults to %(default)s, the current CPU count)')
parser.add_argument(
'--no-progress-indicator',
'-n',
action='store_true',
default=False,
help='Do not display any indicator of how many YAML files were read.')
parser.add_argument('--output', '-o', default='diff.opt.yaml') parser.add_argument('--output', '-o', default='diff.opt.yaml')
args = parser.parse_args() args = parser.parse_args()
if args.jobs == 1:
pmap = map
else:
pool = Pool(processes=args.jobs)
pmap = pool.map
files1 = find_files(args.yaml_dir_or_file_1) files1 = find_files(args.yaml_dir_or_file_1)
files2 = find_files(args.yaml_dir_or_file_2) files2 = find_files(args.yaml_dir_or_file_2)
all_remarks1, _, _ = optrecord.gather_results(pmap, files1) print_progress = not args.no_progress_indicator
all_remarks2, _, _ = optrecord.gather_results(pmap, files2) all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress)
all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress)
added = set(all_remarks2.values()) - set(all_remarks1.values()) added = set(all_remarks2.values()) - set(all_remarks1.values())
removed = set(all_remarks1.values()) - set(all_remarks2.values()) removed = set(all_remarks1.values()) - set(all_remarks2.values())

View File

@ -22,15 +22,19 @@ if __name__ == '__main__':
default=cpu_count(), default=cpu_count(),
type=int, type=int,
help='Max job count (defaults to %(default)s, the current CPU count)') help='Max job count (defaults to %(default)s, the current CPU count)')
parser.add_argument(
'--no-progress-indicator',
'-n',
action='store_true',
default=False,
help='Do not display any indicator of how many YAML files were read.')
args = parser.parse_args() args = parser.parse_args()
if args.jobs == 1: print_progress = not args.no_progress_indicator
pmap = map all_remarks, file_remarks, _ = optrecord.gather_results(
else: args.yaml_files, args.jobs, print_progress)
pool = Pool(processes=args.jobs) if print_progress:
pmap = pool.map print('\n')
all_remarks, file_remarks, _ = optrecord.gather_results(pmap, args.yaml_files)
bypass = defaultdict(int) bypass = defaultdict(int)
byname = defaultdict(int) byname = defaultdict(int)

View File

@ -2,24 +2,28 @@
from __future__ import print_function from __future__ import print_function
import argparse
import cgi
import errno
import functools
from multiprocessing import cpu_count
import os.path
import re
import shutil
from pygments import highlight
from pygments.lexers.c_cpp import CppLexer
from pygments.formatters import HtmlFormatter
import optpmap
import optrecord
desc = '''Generate HTML output to visualize optimization records from the YAML files desc = '''Generate HTML output to visualize optimization records from the YAML files
generated with -fsave-optimization-record and -fdiagnostics-show-hotness. generated with -fsave-optimization-record and -fdiagnostics-show-hotness.
The tools requires PyYAML and Pygments Python packages.''' The tools requires PyYAML and Pygments Python packages.'''
import optrecord
import functools
from multiprocessing import Pool
from multiprocessing import Lock, cpu_count
import errno
import argparse
import os.path
import re
import shutil
from pygments import highlight
from pygments.lexers.c_cpp import CppLexer
from pygments.formatters import HtmlFormatter
import cgi
# This allows passing the global context to the child processes. # This allows passing the global context to the child processes.
class Context: class Context:
@ -177,7 +181,13 @@ def map_remarks(all_remarks):
context.caller_loc[caller] = arg['DebugLoc'] context.caller_loc[caller] = arg['DebugLoc']
def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, should_display_hotness): def generate_report(all_remarks,
file_remarks,
source_dir,
output_dir,
should_display_hotness,
num_jobs,
should_print_progress):
try: try:
os.makedirs(output_dir) os.makedirs(output_dir)
except OSError as e: except OSError as e:
@ -187,7 +197,12 @@ def generate_report(pmap, all_remarks, file_remarks, source_dir, output_dir, sho
raise raise
_render_file_bound = functools.partial(_render_file, source_dir, output_dir, context) _render_file_bound = functools.partial(_render_file, source_dir, output_dir, context)
pmap(_render_file_bound, file_remarks.items()) if should_print_progress:
print('Rendering HTML files...')
optpmap.pmap(_render_file_bound,
file_remarks.items(),
num_jobs,
should_print_progress)
if should_display_hotness: if should_display_hotness:
sorted_remarks = sorted(optrecord.itervalues(all_remarks), key=lambda r: (r.Hotness, r.File, r.Line, r.Column, r.PassWithDiffPrefix, r.yaml_tag, r.Function), reverse=True) sorted_remarks = sorted(optrecord.itervalues(all_remarks), key=lambda r: (r.Hotness, r.File, r.Line, r.Column, r.PassWithDiffPrefix, r.yaml_tag, r.Function), reverse=True)
@ -220,16 +235,25 @@ if __name__ == '__main__':
'-s', '-s',
default='', default='',
help='set source directory') help='set source directory')
parser.add_argument(
'--no-progress-indicator',
'-n',
action='store_true',
default=False,
help='Do not display any indicator of how many YAML files were read '
'or rendered into HTML.')
args = parser.parse_args() args = parser.parse_args()
if args.jobs == 1: print_progress = not args.no_progress_indicator
pmap = map all_remarks, file_remarks, should_display_hotness = \
else: optrecord.gather_results(args.yaml_files, args.jobs, print_progress)
pool = Pool(processes=args.jobs)
pmap = pool.map
all_remarks, file_remarks, should_display_hotness = optrecord.gather_results(pmap, args.yaml_files)
map_remarks(all_remarks) map_remarks(all_remarks)
generate_report(pmap, all_remarks, file_remarks, args.source_dir, args.output_dir, should_display_hotness) generate_report(all_remarks,
file_remarks,
args.source_dir,
args.output_dir,
should_display_hotness,
args.jobs,
print_progress)

View File

@ -0,0 +1,53 @@
import sys
import multiprocessing
_current = None
_total = None
def _init(current, total):
global _current
global _total
_current = current
_total = total
def _wrapped_func(func_and_args):
func, argument, should_print_progress = func_and_args
if should_print_progress:
with _current.get_lock():
_current.value += 1
sys.stdout.write('\r\t{} of {}'.format(_current.value, _total.value))
return func(argument)
def pmap(func, iterable, processes, should_print_progress, *args, **kwargs):
"""
A parallel map function that reports on its progress.
Applies `func` to every item of `iterable` and return a list of the
results. If `processes` is greater than one, a process pool is used to run
the functions in parallel. `should_print_progress` is a boolean value that
indicates whether a string 'N of M' should be printed to indicate how many
of the functions have finished being run.
"""
global _current
global _total
_current = multiprocessing.Value('i', 0)
_total = multiprocessing.Value('i', len(iterable))
func_and_args = [(func, arg, should_print_progress,) for arg in iterable]
if processes <= 1:
result = map(_wrapped_func, func_and_args, *args, **kwargs)
else:
pool = multiprocessing.Pool(initializer=_init,
initargs=(_current, _total,),
processes=processes)
result = pool.map(_wrapped_func, func_and_args, *args, **kwargs)
if should_print_progress:
sys.stdout.write('\r')
return result

View File

@ -10,15 +10,14 @@ except ImportError:
print("For faster parsing, you may want to install libYAML for PyYAML") print("For faster parsing, you may want to install libYAML for PyYAML")
from yaml import Loader from yaml import Loader
import functools
from collections import defaultdict
import itertools
from multiprocessing import Pool
from multiprocessing import Lock, cpu_count
import cgi import cgi
from collections import defaultdict
import functools
from multiprocessing import Lock
import subprocess import subprocess
import traceback import optpmap
p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) p = subprocess.Popen(['c++filt', '-n'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
p_lock = Lock() p_lock = Lock()
@ -210,8 +209,11 @@ def get_remarks(input_file):
return max_hotness, all_remarks, file_remarks return max_hotness, all_remarks, file_remarks
def gather_results(pmap, filenames): def gather_results(filenames, num_jobs, should_print_progress):
remarks = pmap(get_remarks, filenames) if should_print_progress:
print('Reading YAML files...')
remarks = optpmap.pmap(
get_remarks, filenames, num_jobs, should_print_progress)
max_hotness = max(entry[0] for entry in remarks) max_hotness = max(entry[0] for entry in remarks)
def merge_file_remarks(file_remarks_job, all_remarks, merged): def merge_file_remarks(file_remarks_job, all_remarks, merged):