[utils] Implement the llvm-locstats tool

The tool reports verbose output for the DWARF debug location coverage. The llvm-locstats for each variable or formal parameter DIE computes what percentage from the code section bytes, where it is in scope, it has location description. The line 0 shows the number (and the percentage) of DIEs with no location information, but the line 100 shows the number (and the percentage) of DIEs where there is location information in all code section bytes (where the variable or parameter is in the scope). The line 50..59 shows the number (and the percentage) of DIEs where the location information is in between 50 and 59 percentage of its scope covered. The tool will be very useful for tracking improvements regarding the "debugging optimized code" support with LLVM ecosystem. Differential Revision: https://reviews.llvm.org/D66526 llvm-svn: 371520
2019-09-10 13:47:03 +00:00 · 2019-09-10 13:47:03 +00:00 · 54008972d1
parent 7dfd0fb7f1
commit 54008972d1
6 changed files with 315 additions and 0 deletions
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@ -1105,3 +1105,7 @@ if (LLVM_INCLUDE_BENCHMARKS)
  add_subdirectory(utils/benchmark)
  add_subdirectory(benchmarks)
 endif()
+
+if (LLVM_INCLUDE_UTILS)
+  add_subdirectory(utils/llvm-locstats)
+endif()
--- a/llvm/docs/CommandGuide/index.rst
+++ b/llvm/docs/CommandGuide/index.rst
@ -74,3 +74,4 @@ Developer Tools
   llvm-build
   llvm-exegesis
   llvm-pdbutil
+   llvm-locstats
--- a/llvm/docs/CommandGuide/llvm-locstats.rst
+++ b/llvm/docs/CommandGuide/llvm-locstats.rst
@ -0,0 +1,79 @@
+llvm-locstats - calculate statistics on DWARF debug location
+============================================================
+
+.. program:: llvm-locstats
+
+SYNOPSIS
+--------
+
+:program:`llvm-locstats` [*options*] [*filename*]
+
+DESCRIPTION
+-----------
+
+:program:`llvm-locstats` works like a wrapper around :program:`llvm-dwarfdump`.
+It parses :program:`llvm-dwarfdump` statistics regarding debug location by
+pretty printing it in a more human readable way.
+
+The line 0% shows the number and the percentage of DIEs with no location
+information, but the line 100% shows the information for DIEs where there is
+location information in all code section bytes (where the variable or parameter
+is in the scope). The line 50-59% shows the number and the percentage of DIEs
+where the location information is between 50 and 59 percentage of its scope
+covered.
+
+OPTIONS
+-------
+
+.. option:: -only-variables
+
+            Calculate the location statistics only for local variables.
+
+.. option:: -only-formal-parameters
+
+            Calculate the location statistics only for formal parameters.
+
+.. option:: -ignore-debug-entry-values
+
+            Ignore the location statistics on locations containing the
+            debug entry values DWARF operation.
+
+EXIT STATUS
+-----------
+
+:program:`llvm-locstats` returns 0 if the input file were parsed
+successfully. Otherwise, it returns 1.
+
+OUTPUT EXAMPLE
+--------------
+
+.. code-block:: none
+
+  =================================================
+            Debug Location Statistics
+  =================================================
+        cov%          samples       percentage(~)
+  -------------------------------------------------
+     0%                    1              16%
+     1-9%                  0               0%
+     10-19%                0               0%
+     20-29%                0               0%
+     30-39%                0               0%
+     40-49%                0               0%
+     50-99%                1              16%
+     60-69%                0               0%
+     70-79%                0               0%
+     80-89%                1              16%
+     90-99%                0               0%
+     100%                  3              50%
+  =================================================
+  -the number of debug variables processed: 6
+  -PC ranges covered: 81%
+  -------------------------------------------------
+  -total availability: 83%
+  =================================================
+
+SEE ALSO
+--------
+
+:manpage:`llvm-dwarfdump(1)`
--- a/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
+++ b/llvm/test/tools/llvm-dwarfdump/X86/locstats.ll
@ -1,6 +1,9 @@
 ; RUN: llc -debug-entry-values %s -o - -filetype=obj \
 ; RUN:   | llvm-dwarfdump -statistics - | FileCheck %s
 ;
+; RUN: llc -debug-entry-values %s -o %t0.o -filetype=obj \
+; RUN:   | llvm-locstats %t0.o | FileCheck %s --check-prefix=LOCSTATS
+;
 ; CHECK: "entry value scope bytes covered":5
 ; CHECK: "formal params scope bytes total":20
 ; CHECK: "formal params scope bytes covered":20
@ -84,6 +87,20 @@
 ; CHECK: "vars (excluding the debug entry values) with 90-99% of its scope covered":0
 ; CHECK: "vars (excluding the debug entry values) with 100% of its scope covered":1}
 ;
+; Test the llvm-locstats output.
+; LOCSTATS: 0% 1 16%
+; LOCSTATS: 1-9% 0 0%
+; LOCSTATS: 10-19% 0 0%
+; LOCSTATS: 20-29% 0 0%
+; LOCSTATS: 30-39% 0 0%
+; LOCSTATS: 40-49% 0 0%
+; LOCSTATS: 50-59% 1 16%
+; LOCSTATS: 60-69% 0 0%
+; LOCSTATS: 70-79% 0 0%
+; LOCSTATS: 80-89% 1 16%
+; LOCSTATS: 90-99% 0 0%
+; LOCSTATS: 100% 3 50%
+;
 ; The source code of the test case:
 ; extern void fn3(int *);
 ; extern void fn2 (int);
--- a/llvm/utils/llvm-locstats/CMakeLists.txt
+++ b/llvm/utils/llvm-locstats/CMakeLists.txt
@ -0,0 +1,6 @@
+if (LLVM_BUILD_UTILS)
+  add_custom_target(llvm-locstats ALL
+    COMMAND  ${CMAKE_COMMAND} -E copy ${LLVM_MAIN_SRC_DIR}/utils/llvm-locstats/llvm-locstats.py ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llvm-locstats
+    )
+  set_target_properties(llvm-locstats PROPERTIES FOLDER "Utils")
+endif()
--- a/llvm/utils/llvm-locstats/llvm-locstats.py
+++ b/llvm/utils/llvm-locstats/llvm-locstats.py
@ -0,0 +1,208 @@
+#!/usr/bin/env python
+#
+# This is a tool that works like debug location coverage calculator.
+# It parses the llvm-dwarfdump --statistics output by reporting it
+# in a more human readable way.
+#
+
+from __future__ import print_function
+import argparse
+import os
+import sys
+from json import loads
+from math import ceil
+from subprocess import Popen, PIPE
+
+def coverage_buckets():
+  yield '0%'
+  yield '1-9%'
+  for start in range(10, 91, 10):
+    yield '{0}-{1}%'.format(start, start + 9)
+  yield '100%'
+
+def locstats_output(
+  variables_total,
+  variables_total_locstats,
+  variables_with_loc,
+  scope_bytes_covered,
+  scope_bytes_from_first_def,
+  variables_coverage_map
+  ):
+
+  pc_ranges_covered = int(ceil(scope_bytes_covered * 100.0)
+              / scope_bytes_from_first_def)
+  variables_coverage_per_map = {}
+  for cov_bucket in coverage_buckets():
+    variables_coverage_per_map[cov_bucket] = \
+      int(ceil(variables_coverage_map[cov_bucket] * 100.0) \
+               / variables_total_locstats)
+
+  print (' =================================================')
+  print ('            Debug Location Statistics       ')
+  print (' =================================================')
+  print ('     cov%          samples          percentage(~)  ')
+  print (' -------------------------------------------------')
+  for cov_bucket in coverage_buckets():
+    print ('   {0:6}        {1:8d}             {2:3d}%'. \
+      format(cov_bucket, variables_coverage_map[cov_bucket], \
+             variables_coverage_per_map[cov_bucket]))
+  print (' =================================================')
+  print (' -the number of debug variables processed: ' \
+    + str(variables_total_locstats))
+  print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
+
+  # Only if we are processing all the variables output the total
+  # availability.
+  if variables_total and variables_with_loc:
+    total_availability = int(ceil(variables_with_loc * 100.0) \
+                                  / variables_total)
+    print (' -------------------------------------------------')
+    print (' -total availability: ' + str(total_availability) + '%')
+  print (' =================================================')
+
+def parse_program_args(parser):
+  parser.add_argument('-only-variables', action='store_true',
+            default=False,
+            help='calculate the location statistics only for '
+               'local variables'
+            )
+  parser.add_argument('-only-formal-parameters', action='store_true',
+            default=False,
+            help='calculate the location statistics only for '
+               'formal parameters'
+            )
+  parser.add_argument('-ignore-debug-entry-values', action='store_true',
+            default=False,
+            help='ignore the location statistics on locations with '
+               'entry values'
+            )
+  parser.add_argument('file_name', type=str, help='file to process')
+  return parser.parse_args()
+
+
+def Main():
+  parser = argparse.ArgumentParser()
+  results = parse_program_args(parser)
+
+  if len(sys.argv) < 2:
+    print ('error: Too few arguments.')
+    parser.print_help()
+    sys.exit(1)
+
+  if results.only_variables and results.only_formal_parameters:
+    print ('error: Please use just one only* option.')
+    parser.print_help()
+    sys.exit(1)
+
+  # These will be different due to different options enabled.
+  variables_total = None
+  variables_total_locstats = None
+  variables_with_loc = None
+  variables_scope_bytes_covered = None
+  variables_scope_bytes_from_first_def = None
+  variables_scope_bytes_entry_values = None
+  variables_coverage_map = {}
+  binary = results.file_name
+
+  # Get the directory of the LLVM tools.
+  llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
+                                    "llvm-dwarfdump")
+  # The statistics llvm-dwarfdump option.
+  llvm_dwarfdump_stats_opt = "--statistics"
+
+  subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
+                  stdin=PIPE, stdout=PIPE, stderr=PIPE, \
+                  universal_newlines = True)
+  cmd_stdout, cmd_stderr = subproc.communicate()
+
+  # Get the JSON and parse it.
+  json_parsed = None
+
+  try:
+    json_parsed = loads(cmd_stdout)
+  except:
+    print ('error: No valid llvm-dwarfdump statistics found.')
+    sys.exit(1)
+
+  if results.only_variables:
+    # Read the JSON only for local variables.
+    variables_total_locstats = \
+      json_parsed['total vars procesed by location statistics']
+    variables_scope_bytes_covered = \
+      json_parsed['vars scope bytes covered']
+    variables_scope_bytes_from_first_def = \
+      json_parsed['vars scope bytes total']
+    if not results.ignore_debug_entry_values:
+      for cov_bucket in coverage_buckets():
+        cov_category = "vars with {} of its scope covered".format(cov_bucket)
+        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+    else:
+      variables_scope_bytes_entry_values = \
+        json_parsed['vars entry value scope bytes covered']
+      variables_scope_bytes_covered = variables_scope_bytes_covered \
+         - variables_scope_bytes_entry_values
+      for cov_bucket in coverage_buckets():
+        cov_category = \
+          "vars (excluding the debug entry values) " \
+          "with {} of its scope covered".format(cov_bucket)
+        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+  elif results.only_formal_parameters:
+    # Read the JSON only for formal parameters.
+    variables_total_locstats = \
+      json_parsed['total params procesed by location statistics']
+    variables_scope_bytes_covered = \
+      json_parsed['formal params scope bytes covered']
+    variables_scope_bytes_from_first_def = \
+      json_parsed['formal params scope bytes total']
+    if not results.ignore_debug_entry_values:
+      for cov_bucket in coverage_buckets():
+        cov_category = "params with {} of its scope covered".format(cov_bucket)
+        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+    else:
+      variables_scope_bytes_entry_values = \
+        json_parsed['formal params entry value scope bytes covered']
+      variables_scope_bytes_covered = variables_scope_bytes_covered \
+        - variables_scope_bytes_entry_values
+      for cov_bucket in coverage_buckets():
+        cov_category = \
+          "params (excluding the debug entry values) " \
+          "with {} of its scope covered".format(cov_bucket)
+  else:
+    # Read the JSON for both local variables and formal parameters.
+    variables_total = \
+      json_parsed['source variables']
+    variables_with_loc = json_parsed['variables with location']
+    variables_total_locstats = \
+      json_parsed['total variables procesed by location statistics']
+    variables_scope_bytes_covered = \
+      json_parsed['scope bytes covered']
+    variables_scope_bytes_from_first_def = \
+      json_parsed['scope bytes total']
+    if not results.ignore_debug_entry_values:
+      for cov_bucket in coverage_buckets():
+        cov_category = "variables with {} of its scope covered". \
+                       format(cov_bucket)
+        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+    else:
+      variables_scope_bytes_entry_values = \
+        json_parsed['entry value scope bytes covered']
+      variables_scope_bytes_covered = variables_scope_bytes_covered \
+        - variables_scope_bytes_entry_values
+      for cov_bucket in coverage_buckets():
+        cov_category = "variables (excluding the debug entry values) " \
+                       "with {} of its scope covered". format(cov_bucket)
+        variables_coverage_map[cov_bucket] = json_parsed[cov_category]
+
+  # Pretty print collected info.
+  locstats_output(
+    variables_total,
+    variables_total_locstats,
+    variables_with_loc,
+    variables_scope_bytes_covered,
+    variables_scope_bytes_from_first_def,
+    variables_coverage_map
+    )
+
+if __name__ == '__main__':
+  Main()
+  sys.exit(0)